/* This program uses

advertisement
/*
This program uses
PROC GENMOD
in SAS to fit a series of log-linear
models to the counts in a
3-dimensional contingency table
/*
It is stored as
edp.sas
*/
*/
2 2 1 194
1 1 2
2 1 2 169
1 2 2
2
2 2 2
18
run;
data set1;
proc format;
input a b c x;
value edp 1 = 'EDP'
label a = "User status"
2 = 'no EDP';
b = "Hospital size"
value size 1 = 'Large'
c = "Length of stay";
2 = 'Small';
cards;
value stay 1 = 'Short'
1 1 1 428
2 = 'Long';
2 1 1 271
1 2 1
run;
39
802
801
/*
/*
55
Fit the complete independence model */
Fit a cond. independence model */
proc genmod data=set1 order=internal;
class a b c;
proc genmod data=set1 order=internal;
model x = a b c a*c b*c /
class a b c;
dist = poisson
model x = a b c / dist = poisson
maxit=50
link=log
link=log
covb type1 type3 obstats;
format a edp. b size. c stay.;
run;
maxit=50 covb itprint obstats ;
make 'parmest' out=est1;
/*
make 'obstats' out=obstat1;
format a edp. b size. c stay.;
run;
Fit a joint independence model
*/
proc genmod data=set1 order=internal;
class a b c;
proc print data=est1;
model x = a b c a*b / dist = poisson
run;
link=log maxit=50
covb
type1 type3 obstats ;
proc print data=obstat1; run;
format a edp. b size. c stay.;
803
run;
804
The GENMOD Procedure
/*
Model Information
Fit a no three factor interaction
Data Set
Distribution
Link Function
Dependent Variable
Observations Used
model */
proc genmod data=set1 order=internal;
class a b c;
WORK.SET1
Poisson
Log
x
8
model x = a b c a*c b*c / dist = poisson
link=log
maxit=50
covb
Class Level Information
type1 type3 obstats ;
format a edp. b size. c stay.;
Class
run;
Levels
a
b
c
2
2
2
Values
EDP no EDP
Large Small
Short Long
806
805
Criteria For Assessing Goodness Of Fit
Criterion
Iteration History For Parameter Estimates
Iter
Ridge
Log
Likelihood
0
0
5101.07543
1
0
5116.96084
2
0
5117.2192
3
0
5117.2193
4
0
5117.2193
Prm1
Prm4
3.7311096
1.0632877
3.4221535
1.2578693
3.3718945
1.2934595
3.3708544
1.2942394
3.3708544
1.2942394
Prm2
Prm6
-0.136807
1.2012649
-0.209612
1.3207348
-0.218447
1.3398346
-0.218553
1.3401645
-0.218553
1.3401645
807
Deviance
Scaled Deviance
Pearson Chi-Square
Scaled Pearson X2
Log Likelihood
DF
Value
Value/DF
4
4
4
4
247.7444
247.7444
238.4443
238.4443
5117.2193
61.9361
61.9361
59.6111
59.6111
Last Evaluation Of The Negative Of
The Gradient and Hessian
Prm1
Prm2
Grad. 0.0000995
Prm1 1176.0001
Prm2
524.00005
Prm4
923.00001
Prm6
932.00006
0.00005
524.00005
524.00005
411.26872
415.27894
Prm4
0.0000105
923.00001
411.26872
923.00001
731.49319
Prm6
0.000059
932.00006
415.27894
731.49319
932.00006
808
Estimated Covariance Matrix
Prm1
Prm1 0.007884
Prm2 -0.001534
Prm4 -0.003953
Prm6 -0.004098
Prm2
Prm4
-0.001534
0.003442
8.24E-19
1.21E-18
Prm6
-0.003953 -0.004098
8.24E-19 1.21E-18
0.005036 4.07E-19
4.07E-19 0.005171
Analysis Of Parameter Estimates
Parameter
Standard
DF Estimate
Error
Intercept
a
EDP
a no EDP
b Large
b Small
c Short
c Long
Scale
1
1
0
1
0
1
0
0
3.3709
-0.2186
0.0000
1.2942
0.0000
1.3402
0.0000
1.0000
0.0888
0.0587
0.0000
0.0710
0.0000
0.0719
0.0000
0.0000
Wald 95%
Conf. Limits
3.196 3.545
-0.333 -0.104
0.000 0.000
1.155 1.433
0.000 0.000
1.199 1.481
0.000 0.000
1.000 1.000
Obs
1
2
3
4
5
6
7
8
x
428
271
39
194
55
169
2
18
a
b
c
Pred
Xbeta
EDP
no EDP
EDP
no EDP
EDP
no EDP
EDP
no EDP
Large
Large
Small
Small
Large
Large
Small
Small
Short
Short
Short
Short
Long
Long
Long
Long
325.937
405.556
89.342
111.165
85.332
106.176
23.390
29.103
5.78670
6.00526
4.49247
4.71102
4.44654
4.66509
3.15230
3.37085
809
Obs
1
2
3
4
5
6
7
8
Obs
1
2
3
4
5
6
7
8
Std Hesswgt
0.0486
0.0446
0.0723
0.0697
0.0734
0.0708
0.0909
0.0888
325.937
405.555
89.341
111.165
85.331
106.175
23.3898
29.1033
Lower Upper Resraw
296.310
371.607
77.531
96.970
73.896
92.416
19.574
24.455
Resdev
Stresdev
5.3911462
-7.114062
-6.00237
7.0981945
-3.514212
5.6084885
-5.739604
-2.215714
11.254763
-16.18638
-8.225868
10.466935
-4.781762
8.2022624
-6.389847
-2.524141
358.527
442.606
102.951
127.439
98.537
121.984
27.950
34.636
102.06
-134.55
-50.34
82.8346
-30.331
62.824
-21.389
-11.103
Streschi
Reschi
5.653
-6.681
-5.325
7.856
-3.283
6.097
-4.465
-2.058
Reslik
11.801971 11.67868
-15.20229 -15.39729
-7.298918 -7.80619
11.585074 11.08486
-4.467829 -4.64003
8.9167023
8.59006
-4.923823 -6.13402
-2.344679 -2.48411
810
# This is an illustration of using
# the glm function in S-plus to fit
# loglinear models to the analyze
# associations among hospital size,
# use of edp, and length of stay.
# This file is stored as
edp.ssc
# First define a function to
# compute Pearson chi-squared
# and deviance tests and p-values
811
goftests <- function(x, m1, m2, df)
# Compute the G^2 statistic
{
g2 <- 2*sum(x*(log(m2/m1)))
# Compute Pearson chi-squared
pvalg <- 1-pchisq(g2, df)
# and deviance tests and p-values
# In this function
#
cat("\n", "
x = observed counts
#
m1 = expected counts under H0
#
m2 = expected counts under HA
#
df = degrees of freedom
Pearson test = ",
round(x2p,2))
cat("\n", " Degrees of freedom = ", df)
cat("\n", "
p-value = ",
round(pvalp,2))
cat("\n", "
k <- length(x)
Deviance test = ",
round(g2,2))
m1 <- m1 + .00000000000000001
cat("\n", "
m2 <- m2 + .00000000000000001
df = ", df)
cat("\n", "
x2p <- sum(((m1-m2)^2)/m1)
p-value = " ,
round(pvalg,2),"\n")
pvalp <- 1 - pchisq(x2p, df)
}
812
813
# Use the
glm
function to fit
# the complete independence model.
# The data are entered directly
# Use the family=poisson option when
# into the program code
# fiiting a log-linear model even when
# you have multinomial data
edpg <- cbind(expand.grid(
edp=c("nonuser","user"),
size=c("Small","Large"),
stay=c("long","short")),
Fr = c(18, 2, 169, 55, 194, 39, 271, 428))
options(contrasts=c("contr.treatment",
"contr.poly"))
edp1 <- glm(Fr ~ edp + size + stay,
family=poisson, data=edpg,
maxit=20, epsilon=.000001,
# Print the data
x=T, trace=T)
edpg
# Print some results
summary(edp1, correlation=F)
814
815
# Print the estimated means for
# The independence model
edp1$fit
# Test the fit of the model against the
#
Compute the covariance matrix for the
#
large sample normal approximation to
#
the distribution of the parameter
#
estimates
# general alternative
edp1$cov <- solve(t(edp1$x)%*%
diag(edp1$weight)%*%edp1$x)
goftests(edpg$Fr, edp1$fit, edpg$Fr,
edp1$cov
edp1$df)
#
The estimates of the paramters are
#
stored in edp1$coef
#
Produce an analysis of deviance table
anova(edp1, test="Chisq")
edp1$coef
816
817
#
Now fit a joint independence model
#
Now fit a conditional independence
#
and print some results
#
model and print some results
edp2 <- glm(Fr ~ edp + size + stay +
edp3 <- glm(Fr ~ edp + size + stay +
edp*size, family=poisson,
edp*size + size*stay,
data=edpg, maxit=20,
family=poisson, data=edpg,
epsilon=.000001, x=T,
maxit=20, epsilon=.000001,
trace=T)
x=T, trace=T)
summary(edp2, correlation=F)
summary(edp3, correlation=F)
goftests(edpg$Fr, edp2$fit, edpg$Fr,
goftests(edpg$Fr, edp3$fit, edpg$Fr,
edp2$df)
edp3$df)
edp2$cov <- solve(t(edp2$x)%*%
edp3$cov <- solve(t(edp3$x)%*%
diag(edp2$weight)%*%edp2$x)
edp2$cov
diag(edp3$weight)%*%edp3$x)
edp3$cov
818
819
#
Now fit a no three factor interaction
#
model and print some results
edp4 <- glm(Fr ~ edp*size + size*stay +
edp*stay,
#
Use the
step
function to search for
#
a good model.
Here we start with the
#
results for the complete independence
#
model
family=poisson, data=edpg,
maxit=20, epsilon=.000001,
edp.step <- step(edp1,
x=T, trace=T)
list(lower=formula(edp1),
upper=~ .^3), scale=1, trace=F)
summary(edp4, correlation=F)
goftests(edpg$Fr, edp4$fit, edpg$Fr,
edp4$df)
#
Print a summary of the results for
#
the search.
edp4$cov <- solve(t(edp4$x)%*%
edp.step$anova
diag(edp4$weight)%*%edp4$x)
edp4$cov
820
# The data are entered directly
# into the program code
edpg <- cbind(expand.grid(
edp=c("nonuser","user"),
size=c("Small","Large"),
stay=c("long","short")),
Fr = c(18, 2, 169, 55, 194, 39, 271, 428))
# Print the data
edpg
1
2
3
4
5
6
7
8
edp
nonuser
user
nonuser
user
nonuser
user
nonuser
user
size
Small
Small
Large
Large
Small
Small
Large
Large
stay
long
long
long
long
short
short
short
short
Fr
18
2
169
55
194
39
271
428
821
#
#
#
#
#
Use the glm function to fit
the complete independence model.
Use the family=poisson option when
fiiting a log-linear model even when
you have multinomial data
options(contrasts=c("contr.treatment",
"contr.poly"))
edp1 <- glm(Fr ~ edp + size + stay,
family=poisson, data=edpg,
maxit=20, epsilon=.000001,
x=T, trace=T)
# Print some results
summary(edp1, correlation=F)
GLM
GLM
GLM
GLM
822
linear
linear
linear
linear
loop
loop
loop
loop
1:
2:
3:
4:
deviance
deviance
deviance
deviance
=
=
=
=
280.0322
248.2613
247.7446
247.7444
823
Call: glm(formula = Fr ~ edp + size + stay,
family = poisson, data = edpg, x = T,
maxit = 20, epsilon = 1e-006, trace = T)
Deviance Residuals:
1
2
3
4
5
-2.215714 -5.739604 5.608489 -3.514212 7.098194
6
7
8
-6.00237 -7.114062 5.391146
Coefficients:
(Intercept)
edp
size
stay
Value
3.3708544
-0.2185529
1.2942394
1.3401645
Std. Error
0.08876309
0.05866313
0.07094137
0.07189727
t value
37.975857
-3.725557
18.243790
18.639992
Null Deviance: 1096.686 on 7 df
Residual Deviance: 247.7444 on 4 df
Number of Fisher Scoring Iterations: 4
# Print the estimated means for
# the independence model
edp1$fit
1
2
3
4
5
29.10338 23.38984 106.1755 85.33127 111.1654
6
7
8
89.3415 405.5557 325.9374
# Test the fit of the model against the general
# alternative
goftests(edpg$Fr, edp1$fit, edpg$Fr, edp1$df)
Pearson test
Degrees of freedom
p-value
Deviance test
df
p-value
=
=
=
=
=
=
238.44
4
0
247.74
4
0
824
# The estimates of the paramters are
# stored in edp1$coef
825
edp1$cov
edp1$coef
(Intercept)
edp
size
stay
3.370854 -0.2185529 1.294239 1.340164
# Compute the covariance matrix for the
# large sample normal approximation to
# the distribution of the parameter estimates
edp1$cov <- solve(t(edp1$x)%*%
diag(edp1$weight)%*%edp1$x)
826
(Intercept)
edp
size
stay
(Intercept)
0.007878886
-0.001533487
-0.003949304
-0.004096412
edp
-1.533487e-003
3.441363e-003
-1.950104e-018
-5.724499e-019
(Intercept)
edp
size
stay
size
-3.949304e-003
-2.292037e-018
5.032678e-003
-4.942715e-018
stay
-4.096412e-003
-1.132591e-018
-6.887539e-018
5.169217e-003
827
# Produce an analysis of deviance table
edp4 <- glm(Fr ~ edp*size+size*stay+edp*stay,
family=poisson, data=edpg,
maxit=20, epsilon=.000001,
x=T, trace=T)
anova(edp1, test="Chisq")
Analysis of Deviance Table
Poisson model
Response: Fr
Terms added sequentially (first
Df Deviance
Df Resid.
Resid
NULL
7 1096.686
edp 1 13.9596 6 1082.726
size 1 405.6369 5
677.089
stay 1 429.3447 4
247.744
# Now fit a no three factor interaction model
# and print some results
to last)
Pr(Chi)
0.0001867802
0.0000000000
0.0000000000
summary(edp4, correlation=F)
goftests(edpg$Fr, edp4$fit, edpg$Fr, edp4$df)
edp4$cov <- solve(t(edp4$x)%*%
diag(edp4$weight)%*%edp4$x)
edp4$cov
GLM
GLM
GLM
GLM
linear
linear
linear
linear
loop
loop
loop
loop
1:
2:
3:
4:
deviance
deviance
deviance
deviance
=
=
=
=
1.3018
1.2719
1.2719
1.2719
829
828
Coefficients:
(Intercept)
edp
size
stay
edp:size
size:stay
edp:stay
Value
2.952286
-3.114433
2.170786
2.309628
2.019381
-1.826349
1.545133
Std. Error
0.2238374
0.2389421
0.2356911
0.2341949
0.1873423
0.2520890
0.1705747
t value
13.189421
-13.034257
9.210304
9.861990
10.779100
-7.244856
9.058392
Null Deviance: 1096.686 on 7 df.
(Intercept)
edp
size
stay
edp:size
size:stay
edp:stay
(Intercept)
0.050103197
-0.002427370
-0.049861675
-0.049892985
0.001463857
0.049502502
0.001207447
edp
-0.002427370
0.057093318
-0.003253385
-0.002516965
-0.034430871
0.011701368
-0.028399938
size
-0.049861675
-0.003253385
0.055550288
0.049627480
-0.001630866
-0.055150137
0.004612487
(Intercept)
edp
size
stay
edp:size
size:stay
stay
-0.049892985
-0.002516965
0.049627480
0.054847267
0.003576153
-0.054418011
edp:size
0.001463857
-0.034430871
-0.001630866
0.003576153
0.035097123
-0.006980590
size:stay
0.049502502
0.011701368
-0.055150137
-0.054418011
-0.006980590
0.063548866
Residual Deviance: 1.271852 on 1 df
Number of Fisher Scoring Iterations: 4
Pearson test
Degrees of freedom
p-value
Deviance test
df
p-value
=
=
=
=
=
=
1.7
1
0.19
1.27
1
0.26
830
831
# Use the step function to search for a
# good model. Here we start with the results
# for the complete independence model
edp.step <- step(edp1, list(lower=formula(edp1),
upper=~ .^3), scale=1, trace=F)
# Print a summary of the results for the search.
edp.step$anova
edp:stay -0.00132734 0.00518226 -0.00808208
(Intercept)
edp
size
stay
edp:size
size:stay
edp:stay
edp:stay
0.001207447
-0.028399938
0.004612487
-0.001327344
0.005182261
-0.008082077
0.029095736
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Fr ~ edp + size + stay
Final Model:
Fr ~ edp + size + stay + edp:size +
edp:stay + size:stay
Step Df
1
2 + edp:size -1 -114.5872
3 + edp:stay -1 -59.0249
4 + size:stay -1 -72.8604
1
2
3
4
832
Dev.
Resid.
Df Resid. Dev
4 247.7444
3 133.1572
2
74.1323
1
1.2719
AIC
255.7444
143.1572
86.1323
15.2719
833
Download