/* This program uses PROC GENMOD in SAS to fit a series of log-linear models to the counts in a 3-dimensional contingency table /* It is stored as edp.sas */ */ 2 2 1 194 1 1 2 2 1 2 169 1 2 2 2 2 2 2 18 run; data set1; proc format; input a b c x; value edp 1 = 'EDP' label a = "User status" 2 = 'no EDP'; b = "Hospital size" value size 1 = 'Large' c = "Length of stay"; 2 = 'Small'; cards; value stay 1 = 'Short' 1 1 1 428 2 = 'Long'; 2 1 1 271 1 2 1 run; 39 802 801 /* /* 55 Fit the complete independence model */ Fit a cond. independence model */ proc genmod data=set1 order=internal; class a b c; proc genmod data=set1 order=internal; model x = a b c a*c b*c / class a b c; dist = poisson model x = a b c / dist = poisson maxit=50 link=log link=log covb type1 type3 obstats; format a edp. b size. c stay.; run; maxit=50 covb itprint obstats ; make 'parmest' out=est1; /* make 'obstats' out=obstat1; format a edp. b size. c stay.; run; Fit a joint independence model */ proc genmod data=set1 order=internal; class a b c; proc print data=est1; model x = a b c a*b / dist = poisson run; link=log maxit=50 covb type1 type3 obstats ; proc print data=obstat1; run; format a edp. b size. c stay.; 803 run; 804 The GENMOD Procedure /* Model Information Fit a no three factor interaction Data Set Distribution Link Function Dependent Variable Observations Used model */ proc genmod data=set1 order=internal; class a b c; WORK.SET1 Poisson Log x 8 model x = a b c a*c b*c / dist = poisson link=log maxit=50 covb Class Level Information type1 type3 obstats ; format a edp. b size. c stay.; Class run; Levels a b c 2 2 2 Values EDP no EDP Large Small Short Long 806 805 Criteria For Assessing Goodness Of Fit Criterion Iteration History For Parameter Estimates Iter Ridge Log Likelihood 0 0 5101.07543 1 0 5116.96084 2 0 5117.2192 3 0 5117.2193 4 0 5117.2193 Prm1 Prm4 3.7311096 1.0632877 3.4221535 1.2578693 3.3718945 1.2934595 3.3708544 1.2942394 3.3708544 1.2942394 Prm2 Prm6 -0.136807 1.2012649 -0.209612 1.3207348 -0.218447 1.3398346 -0.218553 1.3401645 -0.218553 1.3401645 807 Deviance Scaled Deviance Pearson Chi-Square Scaled Pearson X2 Log Likelihood DF Value Value/DF 4 4 4 4 247.7444 247.7444 238.4443 238.4443 5117.2193 61.9361 61.9361 59.6111 59.6111 Last Evaluation Of The Negative Of The Gradient and Hessian Prm1 Prm2 Grad. 0.0000995 Prm1 1176.0001 Prm2 524.00005 Prm4 923.00001 Prm6 932.00006 0.00005 524.00005 524.00005 411.26872 415.27894 Prm4 0.0000105 923.00001 411.26872 923.00001 731.49319 Prm6 0.000059 932.00006 415.27894 731.49319 932.00006 808 Estimated Covariance Matrix Prm1 Prm1 0.007884 Prm2 -0.001534 Prm4 -0.003953 Prm6 -0.004098 Prm2 Prm4 -0.001534 0.003442 8.24E-19 1.21E-18 Prm6 -0.003953 -0.004098 8.24E-19 1.21E-18 0.005036 4.07E-19 4.07E-19 0.005171 Analysis Of Parameter Estimates Parameter Standard DF Estimate Error Intercept a EDP a no EDP b Large b Small c Short c Long Scale 1 1 0 1 0 1 0 0 3.3709 -0.2186 0.0000 1.2942 0.0000 1.3402 0.0000 1.0000 0.0888 0.0587 0.0000 0.0710 0.0000 0.0719 0.0000 0.0000 Wald 95% Conf. Limits 3.196 3.545 -0.333 -0.104 0.000 0.000 1.155 1.433 0.000 0.000 1.199 1.481 0.000 0.000 1.000 1.000 Obs 1 2 3 4 5 6 7 8 x 428 271 39 194 55 169 2 18 a b c Pred Xbeta EDP no EDP EDP no EDP EDP no EDP EDP no EDP Large Large Small Small Large Large Small Small Short Short Short Short Long Long Long Long 325.937 405.556 89.342 111.165 85.332 106.176 23.390 29.103 5.78670 6.00526 4.49247 4.71102 4.44654 4.66509 3.15230 3.37085 809 Obs 1 2 3 4 5 6 7 8 Obs 1 2 3 4 5 6 7 8 Std Hesswgt 0.0486 0.0446 0.0723 0.0697 0.0734 0.0708 0.0909 0.0888 325.937 405.555 89.341 111.165 85.331 106.175 23.3898 29.1033 Lower Upper Resraw 296.310 371.607 77.531 96.970 73.896 92.416 19.574 24.455 Resdev Stresdev 5.3911462 -7.114062 -6.00237 7.0981945 -3.514212 5.6084885 -5.739604 -2.215714 11.254763 -16.18638 -8.225868 10.466935 -4.781762 8.2022624 -6.389847 -2.524141 358.527 442.606 102.951 127.439 98.537 121.984 27.950 34.636 102.06 -134.55 -50.34 82.8346 -30.331 62.824 -21.389 -11.103 Streschi Reschi 5.653 -6.681 -5.325 7.856 -3.283 6.097 -4.465 -2.058 Reslik 11.801971 11.67868 -15.20229 -15.39729 -7.298918 -7.80619 11.585074 11.08486 -4.467829 -4.64003 8.9167023 8.59006 -4.923823 -6.13402 -2.344679 -2.48411 810 # This is an illustration of using # the glm function in S-plus to fit # loglinear models to the analyze # associations among hospital size, # use of edp, and length of stay. # This file is stored as edp.ssc # First define a function to # compute Pearson chi-squared # and deviance tests and p-values 811 goftests <- function(x, m1, m2, df) # Compute the G^2 statistic { g2 <- 2*sum(x*(log(m2/m1))) # Compute Pearson chi-squared pvalg <- 1-pchisq(g2, df) # and deviance tests and p-values # In this function # cat("\n", " x = observed counts # m1 = expected counts under H0 # m2 = expected counts under HA # df = degrees of freedom Pearson test = ", round(x2p,2)) cat("\n", " Degrees of freedom = ", df) cat("\n", " p-value = ", round(pvalp,2)) cat("\n", " k <- length(x) Deviance test = ", round(g2,2)) m1 <- m1 + .00000000000000001 cat("\n", " m2 <- m2 + .00000000000000001 df = ", df) cat("\n", " x2p <- sum(((m1-m2)^2)/m1) p-value = " , round(pvalg,2),"\n") pvalp <- 1 - pchisq(x2p, df) } 812 813 # Use the glm function to fit # the complete independence model. # The data are entered directly # Use the family=poisson option when # into the program code # fiiting a log-linear model even when # you have multinomial data edpg <- cbind(expand.grid( edp=c("nonuser","user"), size=c("Small","Large"), stay=c("long","short")), Fr = c(18, 2, 169, 55, 194, 39, 271, 428)) options(contrasts=c("contr.treatment", "contr.poly")) edp1 <- glm(Fr ~ edp + size + stay, family=poisson, data=edpg, maxit=20, epsilon=.000001, # Print the data x=T, trace=T) edpg # Print some results summary(edp1, correlation=F) 814 815 # Print the estimated means for # The independence model edp1$fit # Test the fit of the model against the # Compute the covariance matrix for the # large sample normal approximation to # the distribution of the parameter # estimates # general alternative edp1$cov <- solve(t(edp1$x)%*% diag(edp1$weight)%*%edp1$x) goftests(edpg$Fr, edp1$fit, edpg$Fr, edp1$cov edp1$df) # The estimates of the paramters are # stored in edp1$coef # Produce an analysis of deviance table anova(edp1, test="Chisq") edp1$coef 816 817 # Now fit a joint independence model # Now fit a conditional independence # and print some results # model and print some results edp2 <- glm(Fr ~ edp + size + stay + edp3 <- glm(Fr ~ edp + size + stay + edp*size, family=poisson, edp*size + size*stay, data=edpg, maxit=20, family=poisson, data=edpg, epsilon=.000001, x=T, maxit=20, epsilon=.000001, trace=T) x=T, trace=T) summary(edp2, correlation=F) summary(edp3, correlation=F) goftests(edpg$Fr, edp2$fit, edpg$Fr, goftests(edpg$Fr, edp3$fit, edpg$Fr, edp2$df) edp3$df) edp2$cov <- solve(t(edp2$x)%*% edp3$cov <- solve(t(edp3$x)%*% diag(edp2$weight)%*%edp2$x) edp2$cov diag(edp3$weight)%*%edp3$x) edp3$cov 818 819 # Now fit a no three factor interaction # model and print some results edp4 <- glm(Fr ~ edp*size + size*stay + edp*stay, # Use the step function to search for # a good model. Here we start with the # results for the complete independence # model family=poisson, data=edpg, maxit=20, epsilon=.000001, edp.step <- step(edp1, x=T, trace=T) list(lower=formula(edp1), upper=~ .^3), scale=1, trace=F) summary(edp4, correlation=F) goftests(edpg$Fr, edp4$fit, edpg$Fr, edp4$df) # Print a summary of the results for # the search. edp4$cov <- solve(t(edp4$x)%*% edp.step$anova diag(edp4$weight)%*%edp4$x) edp4$cov 820 # The data are entered directly # into the program code edpg <- cbind(expand.grid( edp=c("nonuser","user"), size=c("Small","Large"), stay=c("long","short")), Fr = c(18, 2, 169, 55, 194, 39, 271, 428)) # Print the data edpg 1 2 3 4 5 6 7 8 edp nonuser user nonuser user nonuser user nonuser user size Small Small Large Large Small Small Large Large stay long long long long short short short short Fr 18 2 169 55 194 39 271 428 821 # # # # # Use the glm function to fit the complete independence model. Use the family=poisson option when fiiting a log-linear model even when you have multinomial data options(contrasts=c("contr.treatment", "contr.poly")) edp1 <- glm(Fr ~ edp + size + stay, family=poisson, data=edpg, maxit=20, epsilon=.000001, x=T, trace=T) # Print some results summary(edp1, correlation=F) GLM GLM GLM GLM 822 linear linear linear linear loop loop loop loop 1: 2: 3: 4: deviance deviance deviance deviance = = = = 280.0322 248.2613 247.7446 247.7444 823 Call: glm(formula = Fr ~ edp + size + stay, family = poisson, data = edpg, x = T, maxit = 20, epsilon = 1e-006, trace = T) Deviance Residuals: 1 2 3 4 5 -2.215714 -5.739604 5.608489 -3.514212 7.098194 6 7 8 -6.00237 -7.114062 5.391146 Coefficients: (Intercept) edp size stay Value 3.3708544 -0.2185529 1.2942394 1.3401645 Std. Error 0.08876309 0.05866313 0.07094137 0.07189727 t value 37.975857 -3.725557 18.243790 18.639992 Null Deviance: 1096.686 on 7 df Residual Deviance: 247.7444 on 4 df Number of Fisher Scoring Iterations: 4 # Print the estimated means for # the independence model edp1$fit 1 2 3 4 5 29.10338 23.38984 106.1755 85.33127 111.1654 6 7 8 89.3415 405.5557 325.9374 # Test the fit of the model against the general # alternative goftests(edpg$Fr, edp1$fit, edpg$Fr, edp1$df) Pearson test Degrees of freedom p-value Deviance test df p-value = = = = = = 238.44 4 0 247.74 4 0 824 # The estimates of the paramters are # stored in edp1$coef 825 edp1$cov edp1$coef (Intercept) edp size stay 3.370854 -0.2185529 1.294239 1.340164 # Compute the covariance matrix for the # large sample normal approximation to # the distribution of the parameter estimates edp1$cov <- solve(t(edp1$x)%*% diag(edp1$weight)%*%edp1$x) 826 (Intercept) edp size stay (Intercept) 0.007878886 -0.001533487 -0.003949304 -0.004096412 edp -1.533487e-003 3.441363e-003 -1.950104e-018 -5.724499e-019 (Intercept) edp size stay size -3.949304e-003 -2.292037e-018 5.032678e-003 -4.942715e-018 stay -4.096412e-003 -1.132591e-018 -6.887539e-018 5.169217e-003 827 # Produce an analysis of deviance table edp4 <- glm(Fr ~ edp*size+size*stay+edp*stay, family=poisson, data=edpg, maxit=20, epsilon=.000001, x=T, trace=T) anova(edp1, test="Chisq") Analysis of Deviance Table Poisson model Response: Fr Terms added sequentially (first Df Deviance Df Resid. Resid NULL 7 1096.686 edp 1 13.9596 6 1082.726 size 1 405.6369 5 677.089 stay 1 429.3447 4 247.744 # Now fit a no three factor interaction model # and print some results to last) Pr(Chi) 0.0001867802 0.0000000000 0.0000000000 summary(edp4, correlation=F) goftests(edpg$Fr, edp4$fit, edpg$Fr, edp4$df) edp4$cov <- solve(t(edp4$x)%*% diag(edp4$weight)%*%edp4$x) edp4$cov GLM GLM GLM GLM linear linear linear linear loop loop loop loop 1: 2: 3: 4: deviance deviance deviance deviance = = = = 1.3018 1.2719 1.2719 1.2719 829 828 Coefficients: (Intercept) edp size stay edp:size size:stay edp:stay Value 2.952286 -3.114433 2.170786 2.309628 2.019381 -1.826349 1.545133 Std. Error 0.2238374 0.2389421 0.2356911 0.2341949 0.1873423 0.2520890 0.1705747 t value 13.189421 -13.034257 9.210304 9.861990 10.779100 -7.244856 9.058392 Null Deviance: 1096.686 on 7 df. (Intercept) edp size stay edp:size size:stay edp:stay (Intercept) 0.050103197 -0.002427370 -0.049861675 -0.049892985 0.001463857 0.049502502 0.001207447 edp -0.002427370 0.057093318 -0.003253385 -0.002516965 -0.034430871 0.011701368 -0.028399938 size -0.049861675 -0.003253385 0.055550288 0.049627480 -0.001630866 -0.055150137 0.004612487 (Intercept) edp size stay edp:size size:stay stay -0.049892985 -0.002516965 0.049627480 0.054847267 0.003576153 -0.054418011 edp:size 0.001463857 -0.034430871 -0.001630866 0.003576153 0.035097123 -0.006980590 size:stay 0.049502502 0.011701368 -0.055150137 -0.054418011 -0.006980590 0.063548866 Residual Deviance: 1.271852 on 1 df Number of Fisher Scoring Iterations: 4 Pearson test Degrees of freedom p-value Deviance test df p-value = = = = = = 1.7 1 0.19 1.27 1 0.26 830 831 # Use the step function to search for a # good model. Here we start with the results # for the complete independence model edp.step <- step(edp1, list(lower=formula(edp1), upper=~ .^3), scale=1, trace=F) # Print a summary of the results for the search. edp.step$anova edp:stay -0.00132734 0.00518226 -0.00808208 (Intercept) edp size stay edp:size size:stay edp:stay edp:stay 0.001207447 -0.028399938 0.004612487 -0.001327344 0.005182261 -0.008082077 0.029095736 Stepwise Model Path Analysis of Deviance Table Initial Model: Fr ~ edp + size + stay Final Model: Fr ~ edp + size + stay + edp:size + edp:stay + size:stay Step Df 1 2 + edp:size -1 -114.5872 3 + edp:stay -1 -59.0249 4 + size:stay -1 -72.8604 1 2 3 4 832 Dev. Resid. Df Resid. Dev 4 247.7444 3 133.1572 2 74.1323 1 1.2719 AIC 255.7444 143.1572 86.1323 15.2719 833