## Code for Imputing Mixed Data ## author: Irene Helenowski, Ph.D. (i-helenowski@northwestern.edu) ## function for assigning binary values based on quantiles of underlying normally distributed ## values. bind.fxn=function(x){ l.x=length(x) l.x2=l.x/2 y=x[1:l.x2] z=x[(l.x2+1):l.x] p0=1-mean(y, na.rm=T) q.p0=quantile(z,p0, na.rm=T) z2=rep(NA,l.x2); z.o=as.numeric(na.omit(z)) ind.0=as.numeric(na.omit((1:l.x2)[y==0])) ind.1=as.numeric(na.omit((1:l.x2)[y==1])) z2[ind.0]=z.o[z.o<q.p0] z2[ind.1]=z.o[z.o>q.p0] z2 } ## function for imputing mixed data mv.mix.fxn=function(y,n,b, n2.init, cc, maxits=100, m.imp, j.simul){ APPENDIX (continued) ## determining conditional probabilities in binary variables that ## will be used to compute quantiles of the underlying normal variables. y1=y[,1:n] y2=y[,((n+1):(n+b))] ## determining entry positions of missing values m.ind=apply(is.na(y2),1,sum) m.ind2=(1:dim(y2)[1])[m.ind>0] y2.m=y2[m.ind2,] t.y2=table(y2[,1],y2[,2]) p.y2=c(t.y2[1]/sum(t.y2[1,]),t.y2[2]/sum(t.y2[2,]), t.y2[1]/sum(t.y2[,1]), t.y2/sum(t.y2[,2])) ## computing eCDF values for continuous data e1a=mv.ecdf(y1) e1a[e1a==1]=round(1-10^-5,20) e1a[e1a==0]=round(10^-5,20) ## obtaining N(0,1) values based on computed eCDF values n1=qnorm(e1a) ## reordering values of underlying normal variables to correspond to the binary variables n2=apply(n2.init,2,bind.fxn) n2.1=n2[,1]; n2.2=n2[,2] q.y2=c(quantile(n2.2[y2[,1]==0], p.y2[1], na.rm=T), quantile(n2.2[y2[,1]==1], p.y2[2], na.rm=T), quantile(n2.1[y2[,2]==0], p.y2, na.rm=T), quantile(n2.1[y2[,2]==1], p.y2[4], na.rm=T)) k=dim(y)[2]; n=dim(y)[1] ## setting up matrices to store imputed data, their pairwise correlations, ## means of imputed continuous variables, and proportions of imputed binary variables imp.yds=matrix(0,n,k*m*j) imp.cor2=matrix(0,j.simul,60) imp.m1=matrix(0,j.simul,10) imp.m2=matrix(0,j.simul,10) imp.p3=matrix(0,j.simul,10) imp.p4=matrix(0,j.simul,10) APPENDIX (continued) for (j in 1:j.simul){ for (m in 1:m.imp){ ## setting conditions for re-iteration abd.cc=numeric(6); i=0 while (sum(abd.cc)<6 & i < maxits) { i=i+1 ## imputing normal data underlying original continuous and binary variables ## using the R norm package z.n=cbind(n1,n2) s <- prelim.norm(as.matrix(z.n)) #do preliminary manipulations thetahat <- em.norm(s, showits=FALSE) #find the mle rngseed(sample(10:1000,1)) #set random number generator seed z.imp <- imp.norm(s,thetahat,as.matrix(z.n)) #impute missing data pz.imp=pnorm(z.imp) ## back-transforming imputed normal data onto the scale of the original continuous ## data via the Barton and Schruben (1993) method x1=dim(y1)[2] na.test=apply(is.na(y1), 2, sum) na.test2=(1:x1)[as.numeric(na.test)>0] x2=matrix(na.test2,length(na.test2),1) y.imp1.add=apply(x2,1,bart.k,test.data=y1, u.c=pz.imp, u.msg=as.matrix(e1a)) ## back-transforming imputed normal data to binary data via quantiles z.imp2=z.imp[m.ind2,((n+1):(n+b))] y2.2=y2[m.ind2,] yz.imp2=cbind(y2.2, z.imp2) y.imp2.add=t(apply(yz.imp2,1,bin2,q.y2=q.y2)) y.imp2=y2 y.imp2[m.ind2,]=y.imp2.add y.imp=cbind(y.imp1, y.imp2) ## computing correlations of imputed data imp.cor.m=cor(y.imp,use='pairwise.complete.obs') APPENDIX (continued) imp.cor=l.mat(imp.cor.m)[l.mat(imp.cor.m)!=0] abd=abs(imp.cor-init.cor) dc=sign(imp.cor-init.cor) abd.cc=(abd<cc) ## printing the ith iteration, mth imputation, jth simulation run, and how many pairwise ## correlations meet the convergence criteria print(paste("ith iteration: ",format(i), " --*-- mth imputation: ",format(m), " --*-- jth simulation run: ",format(j), " --*-- close pairs: ",format(sum(abd.cc)), " ** ",format(sign(dc[1])), " ** ",format(sign(dc[4])), " ** ",format(sign(dc[5])), sep = "")) } ## storing imputed data correlations and proportions involving imputed binary data imp.yds[((j-1)*m.imp+(m-1)*dim(y)[2]+1):((j-1)*m.imp+(m-1)*dim(y)[2]+dim(y)[2]) ik.dim=length(imp.cor) imp.cor2[j,(((m-1)*k.dim+1):(k.dim*m))]=imp.cor imp.m1[j,m]=mean(y.imp[,1]) imp.m2[j,m]=mean(y.imp[,2]) imp.p3[j,m]=mean(y.imp[,3]) imp.p4[j,m]=mean(y.imp[,4]) } } ## outputting results print(list(imp.yds, imp.cor2, imp.m1, imp.m2, imp.p3,imp.p4)) }