Mixed_imp - Department of Preventive Medicine

advertisement
## Code for Imputing Mixed Data
## author: Irene Helenowski, Ph.D. (i-helenowski@northwestern.edu)
## function for assigning binary values based on quantiles of underlying normally distributed
## values.
bind.fxn=function(x){
l.x=length(x)
l.x2=l.x/2
y=x[1:l.x2]
z=x[(l.x2+1):l.x]
p0=1-mean(y, na.rm=T)
q.p0=quantile(z,p0, na.rm=T)
z2=rep(NA,l.x2); z.o=as.numeric(na.omit(z))
ind.0=as.numeric(na.omit((1:l.x2)[y==0]))
ind.1=as.numeric(na.omit((1:l.x2)[y==1]))
z2[ind.0]=z.o[z.o<q.p0]
z2[ind.1]=z.o[z.o>q.p0]
z2
}
## function for imputing mixed data
mv.mix.fxn=function(y,n,b, n2.init, cc, maxits=100, m.imp, j.simul){
APPENDIX (continued)
## determining conditional probabilities in binary variables that
## will be used to compute quantiles of the underlying normal variables.
y1=y[,1:n]
y2=y[,((n+1):(n+b))]
## determining entry positions of missing values
m.ind=apply(is.na(y2),1,sum)
m.ind2=(1:dim(y2)[1])[m.ind>0]
y2.m=y2[m.ind2,]
t.y2=table(y2[,1],y2[,2])
p.y2=c(t.y2[1]/sum(t.y2[1,]),t.y2[2]/sum(t.y2[2,]),
t.y2[1]/sum(t.y2[,1]), t.y2/sum(t.y2[,2]))
## computing eCDF values for continuous data
e1a=mv.ecdf(y1)
e1a[e1a==1]=round(1-10^-5,20)
e1a[e1a==0]=round(10^-5,20)
## obtaining N(0,1) values based on computed eCDF values
n1=qnorm(e1a)
## reordering values of underlying normal variables to correspond to the binary variables
n2=apply(n2.init,2,bind.fxn)
n2.1=n2[,1]; n2.2=n2[,2]
q.y2=c(quantile(n2.2[y2[,1]==0], p.y2[1], na.rm=T),
quantile(n2.2[y2[,1]==1], p.y2[2], na.rm=T),
quantile(n2.1[y2[,2]==0], p.y2, na.rm=T),
quantile(n2.1[y2[,2]==1], p.y2[4], na.rm=T))
k=dim(y)[2]; n=dim(y)[1]
## setting up matrices to store imputed data, their pairwise correlations,
## means of imputed continuous variables, and proportions of imputed binary variables
imp.yds=matrix(0,n,k*m*j)
imp.cor2=matrix(0,j.simul,60)
imp.m1=matrix(0,j.simul,10)
imp.m2=matrix(0,j.simul,10)
imp.p3=matrix(0,j.simul,10)
imp.p4=matrix(0,j.simul,10)
APPENDIX (continued)
for (j in 1:j.simul){
for (m in 1:m.imp){
## setting conditions for re-iteration
abd.cc=numeric(6); i=0
while (sum(abd.cc)<6 & i < maxits) {
i=i+1
## imputing normal data underlying original continuous and binary variables
## using the R norm package
z.n=cbind(n1,n2)
s <- prelim.norm(as.matrix(z.n)) #do preliminary manipulations
thetahat <- em.norm(s, showits=FALSE) #find the mle
rngseed(sample(10:1000,1)) #set random number generator seed
z.imp <- imp.norm(s,thetahat,as.matrix(z.n)) #impute missing data
pz.imp=pnorm(z.imp)
## back-transforming imputed normal data onto the scale of the original continuous
## data via the Barton and Schruben (1993) method
x1=dim(y1)[2]
na.test=apply(is.na(y1), 2, sum)
na.test2=(1:x1)[as.numeric(na.test)>0]
x2=matrix(na.test2,length(na.test2),1)
y.imp1.add=apply(x2,1,bart.k,test.data=y1, u.c=pz.imp, u.msg=as.matrix(e1a))
## back-transforming imputed normal data to binary data via quantiles
z.imp2=z.imp[m.ind2,((n+1):(n+b))]
y2.2=y2[m.ind2,]
yz.imp2=cbind(y2.2, z.imp2)
y.imp2.add=t(apply(yz.imp2,1,bin2,q.y2=q.y2))
y.imp2=y2
y.imp2[m.ind2,]=y.imp2.add
y.imp=cbind(y.imp1, y.imp2)
## computing correlations of imputed data
imp.cor.m=cor(y.imp,use='pairwise.complete.obs')
APPENDIX (continued)
imp.cor=l.mat(imp.cor.m)[l.mat(imp.cor.m)!=0]
abd=abs(imp.cor-init.cor)
dc=sign(imp.cor-init.cor)
abd.cc=(abd<cc)
## printing the ith iteration, mth imputation, jth simulation run, and how many pairwise
## correlations meet the convergence criteria
print(paste("ith iteration: ",format(i), " --*-- mth imputation: ",format(m),
" --*-- jth simulation run: ",format(j),
" --*-- close pairs: ",format(sum(abd.cc)),
" ** ",format(sign(dc[1])),
" ** ",format(sign(dc[4])),
" ** ",format(sign(dc[5])),
sep = ""))
}
## storing imputed data correlations and proportions involving imputed binary data
imp.yds[((j-1)*m.imp+(m-1)*dim(y)[2]+1):((j-1)*m.imp+(m-1)*dim(y)[2]+dim(y)[2])
ik.dim=length(imp.cor)
imp.cor2[j,(((m-1)*k.dim+1):(k.dim*m))]=imp.cor
imp.m1[j,m]=mean(y.imp[,1])
imp.m2[j,m]=mean(y.imp[,2])
imp.p3[j,m]=mean(y.imp[,3])
imp.p4[j,m]=mean(y.imp[,4])
}
}
## outputting results
print(list(imp.yds, imp.cor2, imp.m1, imp.m2, imp.p3,imp.p4))
}
Download