Web-based Supplementary Materials for Testing for independence in J x K contingency tables with complex sample survey data by Stuart R. Lipsitz, Garrett M. Fitzmaurice, Debajyoti Sinha, Nathanael Hevelone, Edward Giovannucci, and Jim C. Hu. Web Appendix A Description of Supplementary Materials The supplementary materials contain an illustrative example (Web Appendix B) and a SAS macro (Web Appendix C), for the methods developed in the Biometrics article: For ease of understanding of how to implement the SAS macro, we have included a fictitious dataset with two categorical variables, denoted 'x' and 'y', each with 3 levels (note, for confidentiality reasons, we cannot include the real data example from the paper). The data are contained within the SAS commands for the illustrative example (Web Appendix B), i.e., the data are read directly into SAS (i.e., there is no SAS dataset). If there are any questions about the macro, please contact Stu Lipsitz at the following e-mail address: slipsitz@partners.org Notes: 1) As discussed above, the data are read directly into SAS within a datastep (there is no SAS dataset). 2) The SAS macro can be saved as a file, say 'survey_freq_tests_sas_macro.txt', and included using the following command: %inc 'c:\survey_freq_tests_sas_macro.txt'; (where 'survey_freq_tests_sas_macro.txt' is assumed to be on the c-drive; alternatively, you can point to whatever location you have placed the file). 3) The two variables (row and column variables) can be character or numeric. 4) The macro calls and options available are straightforward, and are displayed in the example. Web Appendix B Illustrative Example *******************************************************************************************************; * Illustrative example for the methods developed in the Biometrics article: *; * Lipsitz SR, Fitzmaurice GM, Sinha D, Hevelone N, Giovannucci E, Hu JC. Testing *; * for independence in J x K contingency tables with complex sample survey data. *; *******************************************************************************************************; data freq_ex; input strata clus y datalines; 1 1 3 2 1 1 3 1 1 1 3 3 1 1 2 3 1 1 3 2 1 1 2 2 1 1 3 3 1 1 2 2 1 1 3 1 1 1 3 1 1 2 2 1 1 2 3 1 1 2 2 1 1 2 3 3 1 2 2 1 1 2 2 3 1 2 3 3 1 2 2 2 1 2 3 3 1 2 3 1 1 2 1 2 1 3 3 3 1 3 1 1 1 3 2 1 1 3 3 2 1 3 2 1 1 3 3 2 1 3 3 2 1 3 2 2 1 3 3 2 1 3 1 1 1 3 3 2 1 3 1 2 1 4 3 2 1 4 2 1 1 4 1 2 1 4 3 1 1 4 3 3 1 4 2 1 1 4 3 1 1 4 2 2 1 4 2 2 1 4 3 3 1 4 2 2 x wt ; 99 125 610 153 211 127 115 98 110 6718 113 147 527 155 143 84 171 217 114 583 312 571 381 285 170 359 86 147 173 206 345 7013 312 99 327 144 427 293 154 177 279 101 93 171 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 10 2 2 3 3 3 3 2 1 3 2 3 3 3 2 3 3 3 3 2 2 3 3 2 2 2 3 3 2 3 3 3 3 2 3 2 2 2 3 3 3 3 3 2 3 1 3 3 3 2 2 3 3 3 2 3 1 3 2 100 2 266 1 431 3 135 3 155 1 110 3 85 1 185 2 260 1 90 3 227 1 101 2 85 1 103 3 578 1 105 2 256 2 93 3 87 1 315 3 189 3 82 3 93 3 301 3 83 3 95 3 81 3 890 2 95 3 437 2 125 2 168 2 100 3 125 1 81 2 82 3 122 3 80 3 1574 1 857 1 114 1 1168 2 234 3 424 2 185 3 94 2 84 1 215 1 112 3 283 2 80 1 124 1 188 3 122 1 3722 2 214 2 959 2 2 2 2 2 2 2 2 2 2 10 10 10 10 10 10 10 10 10 10 2 2 3 2 2 2 3 3 3 1 1 1 3 2 2 1 3 1 2 3 610 828 870 124 281 770 242 107 277 126 ; options nocenter; proc freq; tables x*y /nocol nopercent; weight wt; run; %inc 'c:\survey_freq_tests_sas_macro.txt'; %survfreq(data=freq_ex, row=x, col=y, stratum=strata, cluster=clus, weight=wt); Web Appendix C SAS Macro %macro survfreq(data=,row=Surgery_,col=,stratum=,cluster=,den_df=,weight=); ods html close; ods listing close; data ord_log_(keep=&row &col &stratum &cluster &weight); set &data; zzzz1 = trim(left(&row)) ; if zzzz1 ne '.'; if zzzz1 ne ' '; zzzz2 = trim(left(&col)) ; if zzzz2 ne '.'; if zzzz2 ne ' '; run; proc sort data=ord_log_; by &row; run; data ord_log_(drop=&row); set ord_log_; by &row; if first.&row then ord_row+1; call symput('nrow',ord_row); run; proc sort data=ord_log_; by &col; run; data ord_log_(drop=&col); set ord_log_; by &col; if first.&col then ord_col+1; call symput('ncol',ord_col); call symput('nobs',_n_); run; proc freq data=ord_log_ ; tables &stratum/list out=n_strat; run; data n_strat; set n_strat; call symput('nstrat',_n_); run; proc freq data=ord_log_ ; tables &stratum*&cluster/list out=n_clus; run; data n_clus; set n_clus; call symput('nclus',_n_); run; proc datasets; delete n_clus; run; data dataz; set ord_log_; %do k=1 %to &ncol-1 ; yyyyy=0; if ord_col=&k then yyyyy=1; level__=&k; output; %end; run; /* Wald Statistic */ proc surveyreg data=dataz; weight &weight ; strata &stratum; cluster &cluster; class ord_row level__ ; model yyyyy = level__ ord_row*level__/noint covb solution XPX; ods output ParameterEstimates=pars__ ; ods output CovB = Cov0(drop=Parameter); ods output XPX= inf(drop=Parameter); run; data pars__(keep=Estimate rows_col_); set pars__; rows_col_=_n_; if tValue > .; run; proc iml worksize=500; reset nolog ; USE pars__; READ ALL INTO pars__; beta = pars__[,1]; rows_col_ = (pars__[,2])`; USE inf; READ ALL INTO inf; inf = inf[rows_col_,rows_col_]; USE Cov0; READ ALL INTO Cov0; Cov0 = Cov0[rows_col_,rows_col_]; beta = beta[&ncol:&nrow#(&ncol-1),]; p = &nrow#(&ncol-1) ; lambda = p/(&nobs - p ); tr_QG = (1/p)#trace(Cov0*inf); if tr_QG >= 1 then k = tr_QG; if tr_QG < 1 then k = 1; Var0 = Cov0 + k#lambda#inv(inf); tr = j((&nrow-1)#(&ncol-1),(&ncol-1),0) || I((&nrow-1)#(&ncol-1)) ; V_0 = tr*Var0*tr`; num_df = (&nrow-1)#(&ncol-1); den_df = &nclus - &nstrat - num_df + 1; %if &den_df^= %then %do; den_df= &den_df ; %end; F = beta`*inv(V_0)*beta #den_df/((&nclus - &nstrat)#num_df ) ; p=1-probF(F,num_df,den_df); coln = {'F_wald'} || {'num_df'} || {'den_df'} || {'p_wald'}; out = F || num_df || den_df || p ; create wald__ from out [colname=coln]; append from out; close wald__ ; quit; /* Score Statistic */ data ord_log_ ; set ord_log_; %do j=1 %to &nrow-1; __row&j=(ord_row=&j); %end; run; proc logistic data=ord_log_ outest=esti0(drop=_LINK_ _TYPE_ _STATUS_ _NAME_ _LNLIKE_ _ESTTYPE_); weight &weight ; model ord_col = /link=glogit; run; data esti0; set esti0; %do k=1 %to &ncol-1 ; %do j=1 %to &nrow-1 ; __row&j._&k=0; %end; %end; run; proc surveylogistic data=ord_log_ inest=esti0; weight &weight ; strata &stratum; cluster &cluster; model ord_col = %do j=1 %to &nrow-1 ; __row&j %end; / link=glogit TECH= NEWTON MAXITER= 0 ITPRINT COVB; ods output CovB = V_TB(drop=Parameter); run; proc logistic data=ord_log_ inest=esti0; weight &weight; model ord_col = %do j=1 %to &nrow-1; __row&j %end; / link=glogit MAXITER= 0 COVB; ods output CovB = V_I_TB(drop=Parameter); run; proc iml worksize=500; reset nolog; USE V_TB; READ ALL INTO V_TB; USE V_I_TB; READ ALL INTO V_I_TB; UUSQ = inv(V_I_TB)*V_TB*inv(V_I_TB); USE pars__; READ ALL INTO pars__; beta = pars__[,1]; rows_col_ = (pars__[,2])`; USE inf; READ ALL INTO inf; inf = inf[rows_col_,rows_col_]; Cov0 = inv(inf)*UUSQ*inv(inf); beta = beta[&ncol:&nrow#(&ncol-1),]; p = &nrow#(&ncol-1) ; lambda = p/(&nobs - p ); tr_QG = (1/p)#trace(Cov0*inf); if tr_QG >= 1 then k = tr_QG; if tr_QG < 1 then k = 1; Var0 = Cov0 + k#lambda#inv(inf); tr = j((&nrow-1)#(&ncol-1),(&ncol-1),0) || I((&nrow-1)#(&ncol-1)); V_0 = tr*Var0*tr`; num_df = (&nrow-1)#(&ncol-1); den_df = &nclus - &nstrat - num_df + 1; %if &den_df^= %then %do; den_df= &den_df ; %end; F = beta`*inv(V_0)*beta #den_df/((&nclus - &nstrat)#num_df ); p=1-probF(F,num_df,den_df); coln = {'F_score'} || {'p_score'}; out = F || p ; create score__ from out [colname=coln]; append from out; close score__ ; quit; data results__ ; merge wald__ score__ ; row_variable="&row"; column_variable="&col"; run; ods html; ods listing; proc print noobs data=results__ split='*' ; var row_variable column_variable num_df den_df F_wald p_wald F_score p_score; label row_variable="Row*Variable"; label column_variable="Column*Variable"; label num_df="Numerator*DF"; label den_df="Denomator*DF"; label F_wald="Wald*F-statistic"; label p_wald="Wald*P-value"; label F_score="Score*F-statistic"; label p_score="Score*P-value"; run; proc datasets; delete COV0 DATAZ ESTI0 INF N_STRAT ORD_LOG_ PARS__ V_I_TB V_TB results__ wald__ score__; run; title ' %mend; ';