function olscl(X,y, group=group)
XXfactored = cholesky(X'*X)
β̂ = XXfactored \ X'*y
V = olsclvar(X, y - X*β̂, XXfactored, group=group)
return(β̂, V)
end
function olsclvar(X, ϵ, XXf; group=axes(X)[1])
n, k = size(X)
groups=unique(group)
G = length(groups)
iXX = inv(XXf)
@views @inbounds Vr = G/(G-k)*iXX*
sum(X[group.==g,:]'*ϵ[group.==g]*ϵ[group.==g]'*X[group.==g,:] for g in groups)*iXX
return(Vr)
end
function simcluster(n,k,G; β=ones(k), σ = x->1, ρ=0.7)
X = randn(n,k)
group = rand(1:G,n)
for g ∈ 1:G # ensure all groups included
if sum(group.==g)==0
group[g]=g
end
end
X[:,1] .+= (group.>(G/2))
u = randn(G)
ϵ = (ρ*u[group] + sqrt(1-ρ^2)*randn(n)).*mapslices(σ, X, dims=[2])
y = X*β + ϵ
return(X,y, group)
end
function simsizecluster(n,k,G,S; β=ones(k), σ=x->1, ρ=0.7)
z = zeros(S,2)
for s ∈ 1:S
X,y,group = simcluster(n,k,G,β=β,σ=σ, ρ=ρ)
β̂, Vcr = olscl(X,y, group)
_, Vr, _ = ols(X,y)
z[s,1] = (β̂[1] - β[1])/sqrt(Vcr[1,1])
z[s,2] = (β̂[1] - β[1])/sqrt(Vr[1,1])
end
p = hcat(cdf.(Normal(),z), cdf.(TDist(G-k),z))
return(p, z)
end
function makeplotcl(p,z,n,g; u = range(0,1,length=100))
plt = Plot()
plt.layout = Config()
plt.layout.title="N=$n, G=$g"
plt.layout.yaxis.title.text="x - P(asymptotic p-value < x)"
plt.layout.xaxis.title.text="x"
plt(x=u, y=(u .- (x->mean(p[:,1].<=x)).(u)), name="Clustered (Normal Distribution)")
plt(x=u, y=(u .- (x->mean(p[:,2].<=x)).(u)), name="Heteroskedasticity Robust")
plt(x=u, y=(u .- (x->mean(p[:,3].<=x)).(u)), name="Clustered (t-Distribution)")
return(plt())
end
N = [200, 200, 5000, 5000]
G = [10, 100, 10, 100]
k = 2
for (n,g) ∈ zip(N,G)
if !isfile("size_cluster_$(n)_$g.html")
fig = makeplotcl(simsizecluster(n,k,g,10_000)...,n,g)
PlotlyLight.save(fig, "size_cluster_$(n)_$g.html")
end
end