Web-based Supplementary Materials for Testing for independence

advertisement
Web-based Supplementary Materials for Testing for independence in J x K contingency tables with
complex sample survey data by Stuart R. Lipsitz, Garrett M. Fitzmaurice, Debajyoti Sinha,
Nathanael Hevelone, Edward Giovannucci, and Jim C. Hu.
Web Appendix A
Description of Supplementary Materials
The supplementary materials contain an illustrative example (Web Appendix B) and a SAS macro
(Web Appendix C), for the methods developed in the Biometrics article:
For ease of understanding of how to implement the SAS macro, we have included a fictitious dataset
with two categorical variables, denoted 'x' and 'y', each with 3 levels (note, for confidentiality
reasons, we cannot include the real data example from the paper).
The data are contained within the SAS commands for the illustrative example (Web Appendix B), i.e.,
the data are read directly into SAS (i.e., there is no SAS dataset).
If there are any questions about the macro, please contact Stu Lipsitz at the following e-mail
address:
slipsitz@partners.org
Notes:
1) As discussed above, the data are read directly into SAS within a datastep (there is no SAS dataset).
2) The SAS macro can be saved as a file, say 'survey_freq_tests_sas_macro.txt', and included using
the following command:
%inc 'c:\survey_freq_tests_sas_macro.txt';
(where 'survey_freq_tests_sas_macro.txt' is assumed to be on the c-drive; alternatively, you can
point to whatever location you have placed the file).
3) The two variables (row and column variables) can be character or numeric.
4) The macro calls and options available are straightforward, and are displayed in the example.
Web Appendix B
Illustrative Example
*******************************************************************************************************;
* Illustrative example for the methods developed in the Biometrics article:
*;
* Lipsitz SR, Fitzmaurice GM, Sinha D, Hevelone N, Giovannucci E, Hu JC. Testing
*;
* for independence in J x K contingency tables with complex sample survey data.
*;
*******************************************************************************************************;
data freq_ex;
input strata clus y
datalines;
1
1
3 2
1
1
3 1
1
1
3 3
1
1
2 3
1
1
3 2
1
1
2 2
1
1
3 3
1
1
2 2
1
1
3 1
1
1
3 1
1
2
2 1
1
2
3 1
1
2
2 1
1
2
3 3
1
2
2 1
1
2
2 3
1
2
3 3
1
2
2 2
1
2
3 3
1
2
3 1
1
2
1 2
1
3
3 3
1
3
1 1
1
3
2 1
1
3
3 2
1
3
2 1
1
3
3 2
1
3
3 2
1
3
2 2
1
3
3 2
1
3
1 1
1
3
3 2
1
3
1 2
1
4
3 2
1
4
2 1
1
4
1 2
1
4
3 1
1
4
3 3
1
4
2 1
1
4
3 1
1
4
2 2
1
4
2 2
1
4
3 3
1
4
2 2
x wt ;
99
125
610
153
211
127
115
98
110
6718
113
147
527
155
143
84
171
217
114
583
312
571
381
285
170
359
86
147
173
206
345
7013
312
99
327
144
427
293
154
177
279
101
93
171
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
5
5
5
5
5
5
5
5
5
5
5
6
6
6
6
6
6
6
6
6
6
6
6
7
7
7
7
7
7
7
7
7
7
7
8
8
8
8
8
8
8
8
8
8
9
9
9
9
9
9
9
9
9
9
9
9
10
2
2
3
3
3
3
2
1
3
2
3
3
3
2
3
3
3
3
2
2
3
3
2
2
2
3
3
2
3
3
3
3
2
3
2
2
2
3
3
3
3
3
2
3
1
3
3
3
2
2
3
3
3
2
3
1
3
2 100
2 266
1 431
3 135
3 155
1 110
3
85
1 185
2 260
1
90
3 227
1 101
2
85
1 103
3 578
1 105
2 256
2
93
3
87
1 315
3 189
3
82
3
93
3 301
3
83
3
95
3
81
3 890
2
95
3 437
2 125
2 168
2 100
3 125
1
81
2
82
3 122
3
80
3 1574
1 857
1 114
1 1168
2 234
3 424
2 185
3
94
2
84
1 215
1 112
3 283
2
80
1 124
1 188
3 122
1 3722
2 214
2 959
2
2
2
2
2
2
2
2
2
2
10
10
10
10
10
10
10
10
10
10
2
2
3
2
2
2
3
3
3
1
1
1
3
2
2
1
3
1
2
3
610
828
870
124
281
770
242
107
277
126
;
options nocenter;
proc freq;
tables x*y /nocol nopercent;
weight wt;
run;
%inc 'c:\survey_freq_tests_sas_macro.txt';
%survfreq(data=freq_ex, row=x, col=y, stratum=strata, cluster=clus, weight=wt);
Web Appendix C
SAS Macro
%macro survfreq(data=,row=Surgery_,col=,stratum=,cluster=,den_df=,weight=);
ods html close;
ods listing close;
data ord_log_(keep=&row &col &stratum &cluster &weight);
set &data;
zzzz1 = trim(left(&row)) ;
if zzzz1 ne '.';
if zzzz1 ne ' ';
zzzz2 = trim(left(&col)) ;
if zzzz2 ne '.';
if zzzz2 ne ' ';
run;
proc sort data=ord_log_;
by &row;
run;
data ord_log_(drop=&row);
set ord_log_;
by &row;
if first.&row then ord_row+1;
call symput('nrow',ord_row);
run;
proc sort data=ord_log_;
by &col;
run;
data ord_log_(drop=&col);
set ord_log_;
by &col;
if first.&col then ord_col+1;
call symput('ncol',ord_col);
call symput('nobs',_n_);
run;
proc freq data=ord_log_ ;
tables &stratum/list out=n_strat;
run;
data n_strat;
set n_strat;
call symput('nstrat',_n_);
run;
proc freq data=ord_log_ ;
tables &stratum*&cluster/list out=n_clus;
run;
data n_clus;
set n_clus;
call symput('nclus',_n_);
run;
proc datasets;
delete n_clus;
run;
data dataz;
set ord_log_;
%do k=1 %to &ncol-1 ;
yyyyy=0;
if ord_col=&k then yyyyy=1;
level__=&k;
output;
%end;
run;
/* Wald Statistic */
proc surveyreg data=dataz;
weight &weight ;
strata &stratum;
cluster &cluster;
class ord_row level__ ;
model yyyyy = level__ ord_row*level__/noint covb solution XPX;
ods output ParameterEstimates=pars__ ;
ods output CovB = Cov0(drop=Parameter);
ods output XPX= inf(drop=Parameter);
run;
data pars__(keep=Estimate rows_col_);
set pars__;
rows_col_=_n_;
if tValue > .;
run;
proc iml worksize=500;
reset nolog ;
USE pars__;
READ ALL INTO pars__;
beta = pars__[,1];
rows_col_ = (pars__[,2])`;
USE inf;
READ ALL INTO inf;
inf = inf[rows_col_,rows_col_];
USE Cov0;
READ ALL INTO Cov0;
Cov0 = Cov0[rows_col_,rows_col_];
beta = beta[&ncol:&nrow#(&ncol-1),];
p = &nrow#(&ncol-1) ;
lambda = p/(&nobs - p );
tr_QG = (1/p)#trace(Cov0*inf);
if tr_QG >= 1 then k = tr_QG;
if tr_QG < 1 then k = 1;
Var0 = Cov0 + k#lambda#inv(inf);
tr = j((&nrow-1)#(&ncol-1),(&ncol-1),0) || I((&nrow-1)#(&ncol-1)) ;
V_0 = tr*Var0*tr`;
num_df = (&nrow-1)#(&ncol-1);
den_df = &nclus - &nstrat - num_df + 1;
%if &den_df^= %then %do;
den_df= &den_df ;
%end;
F = beta`*inv(V_0)*beta #den_df/((&nclus - &nstrat)#num_df ) ;
p=1-probF(F,num_df,den_df);
coln = {'F_wald'} || {'num_df'} || {'den_df'} || {'p_wald'};
out = F || num_df || den_df || p ;
create wald__ from out [colname=coln];
append from out;
close wald__ ;
quit;
/* Score Statistic */
data ord_log_ ;
set ord_log_;
%do j=1 %to &nrow-1;
__row&j=(ord_row=&j);
%end;
run;
proc logistic data=ord_log_ outest=esti0(drop=_LINK_ _TYPE_ _STATUS_ _NAME_
_LNLIKE_ _ESTTYPE_);
weight &weight ;
model ord_col = /link=glogit;
run;
data esti0;
set esti0;
%do k=1 %to &ncol-1 ;
%do j=1 %to &nrow-1 ;
__row&j._&k=0;
%end;
%end;
run;
proc surveylogistic data=ord_log_ inest=esti0;
weight &weight ;
strata &stratum;
cluster &cluster;
model ord_col =
%do j=1 %to &nrow-1 ;
__row&j
%end;
/ link=glogit TECH= NEWTON MAXITER= 0 ITPRINT COVB;
ods output CovB = V_TB(drop=Parameter);
run;
proc logistic data=ord_log_ inest=esti0;
weight &weight;
model ord_col =
%do j=1 %to &nrow-1;
__row&j
%end;
/ link=glogit MAXITER= 0 COVB;
ods output CovB = V_I_TB(drop=Parameter);
run;
proc iml worksize=500;
reset nolog;
USE V_TB;
READ ALL INTO V_TB;
USE V_I_TB;
READ ALL INTO V_I_TB;
UUSQ = inv(V_I_TB)*V_TB*inv(V_I_TB);
USE pars__;
READ ALL INTO pars__;
beta = pars__[,1];
rows_col_ = (pars__[,2])`;
USE inf;
READ ALL INTO inf;
inf = inf[rows_col_,rows_col_];
Cov0 = inv(inf)*UUSQ*inv(inf);
beta = beta[&ncol:&nrow#(&ncol-1),];
p = &nrow#(&ncol-1) ;
lambda = p/(&nobs - p );
tr_QG = (1/p)#trace(Cov0*inf);
if tr_QG >= 1 then k = tr_QG;
if tr_QG < 1 then k = 1;
Var0 = Cov0 + k#lambda#inv(inf);
tr = j((&nrow-1)#(&ncol-1),(&ncol-1),0) || I((&nrow-1)#(&ncol-1));
V_0 = tr*Var0*tr`;
num_df = (&nrow-1)#(&ncol-1);
den_df = &nclus - &nstrat - num_df + 1;
%if &den_df^= %then %do;
den_df= &den_df ;
%end;
F = beta`*inv(V_0)*beta #den_df/((&nclus - &nstrat)#num_df );
p=1-probF(F,num_df,den_df);
coln = {'F_score'} || {'p_score'};
out = F || p ;
create score__ from out [colname=coln];
append from out;
close score__ ;
quit;
data results__ ;
merge wald__ score__ ;
row_variable="&row"; column_variable="&col";
run;
ods html;
ods listing;
proc print noobs data=results__ split='*' ;
var row_variable column_variable num_df den_df F_wald p_wald F_score p_score;
label row_variable="Row*Variable";
label column_variable="Column*Variable";
label num_df="Numerator*DF";
label den_df="Denomator*DF";
label F_wald="Wald*F-statistic";
label p_wald="Wald*P-value";
label F_score="Score*F-statistic";
label p_score="Score*P-value";
run;
proc datasets;
delete COV0 DATAZ ESTI0 INF N_STRAT ORD_LOG_ PARS__ V_I_TB V_TB
results__ wald__ score__;
run;
title '
%mend;
';
Download