CARDS; 49.057 6 53 52.991 13 47 56.911 18 44

advertisement
/* This program is stored as bliss.sas */
/*
This program uses
PROC LOGISTIC
in SAS to fit models with logistic,
probit, and complimentary log-log
link functions to the beetle mortality
data collected by Chester Bliss */
CARDS;
49.057
52.991
56.911
60.842
64.759
68.691
72.611
76.542
RUN;
6 53
13 47
18 44
28 28
52 11
53 6
61 1
60 0
PROC PRINT DATA=SET1;
TITLE ' LOGISTIC REGRESSION
ANALYSIS OF THE BLISS DATA' ;
RUN;
DATA SET1;
INPUT Z X1 X2;
ZL = LOG(Z);
N = X1+x2;
PROC LOGISTIC DATA=SET1 COVOUT
OUTEST=SETP1;
MODEL X1/N = ZL / ITPRINT COVB
MAXITER=50 CONVERGE=.000001
PPROB=.5 SELECTION=NONE;
OUTPUT OUT=SETR1 L=LOWER95 P=PHAT
U=UPPER95 / ALPHA=.05;
TITLE 'LOGISTIC REGRESSION MODEL ON
LOG(DOSE) FOR THE BLISS DATA';
RUN;
LABEL Z = DOSE
ZL = LOG(DOSE)
X1 = NUMBER DEAD
X2 = NUMBER ALIVE
N = NUMBER EXPOSED;
975
976
proc iml;
start ld50;
PROC PRINT DATA=SETP1;
use set2;
TITLE 'DATA SET CONTAINING INFORMATION
read all into b;
ON PARAMETER ESTIMATES';
use set3;
PROC PRINT DATA=SETR1;
read all into v;
TITLE ' ESTIMATES OF MORTALITY RATES';
ld50 = -b[1,1]/b[1,2];
g = (-1 || b[1,1]/b[1,2])/b[1,2];
/* Estimate the LD50 */
s = sqrt(g*v*t(g));
lower = ld50 - (1.96)*s;
data set2; set setp1;
upper = ld50 + (1.96)*s;
if(_TYPE_ = 'PARMS');
keep intercept zl; run;
ld50 = exp(ld50);
stderr = ld50*s;
data set3; set setp1;
lower=exp(lower);
if(_TYPE_ = 'COV');
upper=exp(upper);
keep intercept zl; run;
print ld50 stderr lower upper;
finish;
977
978
run ld50;
SYMBOL1 V=NONE I=SPLINE L=1 W=2 H=2;
SYMBOL2 V=circle H=2;
/* Plot the onserved proportions and
RUN;
the fitted curve */
PROC GPLOT DATA=SETR1;
DATA SETR1; SET SETR1; PROB=X1/(X1+X2);
AXIS1 LABEL= (H=2.5 R=0 A=90
RUN;
F=swiss 'Mortality Rate' )
VALUE=(H=2 F=swiss)
/* Use this to create graphs in Windows */
goptions
LENGTH=5 in
ORDER=0.0 TO 1.0 BY 0.2;
cback=white colors=(black)
AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' )
device=WIN target=WINPRTC
VALUE=(H=2)
rotate=portrait;
LENGTH=6 in
/* Use this to produce a postscript plot
in the VINCENT system */
ORDER=3.8 TO 4.4 BY 0.1;
PLOT PHAT*ZL PROB*ZL/ OVERLAY
VAXIS=AXIS1 HAXIS=AXIS2;
/* filename graffile pipe 'lpr -Dpostscript';
goptions gsfmode=replace gsfname=graffile
cback=white colors=(black)
targetdevice=ps300 rotate=landscape;*/
TITLE H=3. F=swiss
'LOGISTIC REGRESSION ANALYSIS';
TITLE2 H=3 F=swiss 'Bliss beetle data';
980
979
/* FIT A PROBIT MODEL TO THE BLISS DATA */
PROC GPLOT DATA=SETR2;
PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP2;
AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss
MODEL X1/N = ZL / ITPRINT COVB LINK=NORMIT
'Mortality Rate' )
PPROB=.5 SELECTION=NONE;
VALUE=(H=2 F=swiss)
OUTPUT OUT=SETR2 L=LOWER95 P=PHAT U=UPPER95
LENGTH=5 in
/ ALPHA=.05;
ORDER=0.0 TO 1.0 BY 0.2;
AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' )
TITLE 'PROBIT MODEL USING LOG(DOSE)
FOR THE BLISS DATA';
VALUE=(H=2 F=swiss)
RUN;
LENGTH=6 in
ORDER=3.8 TO 4.4 BY 0.1;
PLOT PHAT*ZL PROB*ZL / OVERLAY
PROC PRINT DATA=SETP2;
TITLE 'DATA SET CONTAINING INFORMATION
VAXIS=AXIS1 HAXIS=AXIS2;
ON PARAMETER ESTIMATES';
TITLE H=3. F=swiss 'PROBIT ANALYSIS';
PROC PRINT DATA=SETR2;
TITLE2 H=3 F=swiss 'Bliss beetle data';
TITLE ' ESTIMATES OF MORTALITY RATES';
RUN;
DATA SETR2; SET SETR2; PROB= X1/(X1+X2);
RUN;
981
982
/* Fit the complimentary log-log model
to the Bliss data */
PROC GPLOT DATA=SETR3;
TITLE ' COMPLIMENTARY LOG-LOG
AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss
ANALYSIS OF THE BLISS DATA' ;
'Mortality Rate' )
PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP3;
VALUE=(H=2 F=swiss )
MODEL X1/N = ZL / ITPRINT COVB LINK=CLOGLOG
LENGTH=5 in
PPROB=.5 SELECTION=NONE;
OUTPUT OUT=SETR3 L=LOWER95 P=PHAT
ORDER=0.0 TO 1.0 BY 0.2;
AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' )
U=UPPER95 / ALPHA=.05;
VALUE=(H=2 F=swiss)
TITLE 'COMPLIMENTARY LOG-LOG MODEL
LENGTH=6 in
FOR THE BLISS DATA';
ORDER=3.8 TO 4.4 BY 0.1;
run;
PLOT PHAT*ZL=1
PROB*ZL=2 /
OVERLAY VAXIS=AXIS1 HAXIS=AXIS2;
PROC PRINT DATA=SETP3;
TITLE H=3. F=swiss
TITLE 'DATA SET CONTAINING INFORMATION
'COMPLIMENTARY LOG-LOG ANALYSIS';
ON PARAMETER ESTIMATES';
TITLE2 H=3 F=swiss 'Bliss beetle data';
PROC PRINT DATA=SETR3;
RUN;
TITLE ' ESTIMATES OF MORTALITY RATES';
DATA SETR3; SET SETR3; PROB = X1/(X1+X2);
RUN;
984
983
LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA
Response Profile
Obs
Z
X1
X2
ZL
N
1
49.057
6
53
3.89298
59
2
52.991
13
47
3.97012
60
3
56.911
18
44
4.04149
62
1
Event
291
4
60.842
28
28
4.10828
56
2
Nonevent
190
5
64.759
52
11
4.17067
63
6
68.691
53
6
4.22962
59
7
72.611
61
1
4.28512
62
8
76.542
60
0
4.33784
60
Ordered
Value
Outcome
Total
Frequency
Maximum Likelihood Iteration History
Iter
Model Information
Binary
Ridge
-2 Log L
Intercept
ZL
0
0
645.441025
0.426299
0
1
0
395.941537
-39.615600
9.694171
-54.667149
13.394642
Data Set
WORK.SET1
2
0
374.092238
Response Variable (Events)
X1
3
0
372.484914
-60.122455
14.736966
Response Variable (Trials)
N
4
0
372.470133
-60.711104
14.881848
Number of Observations
8
5
0
372.470132
-60.717199
14.883348
Link Function
Logit
6
0
372.470132
-60.717199
14.883348
Optimization Technique
Fisher's scoring
985
986
Analysis of Maximum Likelihood Estimates
Standard
Model Fit Statistics
Intercept
Intercept
Criterion
and
Only
Covariates
AIC
647.441
376.470
SC
651.617
384.822
-2 Log L
645.441
372.470
Estimate
Error
Chi-Square
Intercept
1
-60.7172
5.1806
137.3587
<.0001
ZL
1
14.8833
1.2647
138.4904
<.0001
Odds Ratio Estimates
Point
Testing Global Null Hypothesis: BETA=0
Test
Chi-Square
Likelihood Ratio
DF
Pr > ChiSq
1
<.0001
272.9709
Pr > C
Parameter DF
Score
227.5810
1
<.0001
Wald
138.4904
1
<.0001
Effect
Estimate
ZL
>999.999
95% Wald
Confidence Limits
>999.999
>999.999
Association of Predicted Probabilities
and Observed Responses
Percent Concordant
87.0
Somers' D
0.802
Percent Discordant
6.8
Gamma
0.856
Percent Tied
6.3
Tau-a
0.384
Pairs
55290
c
0.901
988
987
Estimated Covariance Matrix
Variable
Intercept
ZL
Intercept
ZL
26.83906
-6.54992
-6.54992
1.59949
ESTIMATES OF MORTALITY RATES
DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES
O
b
s
_
L
I
N
K
_
_
T
Y
P
E
_
_
S
T
A
T
U
S
_
_
N
A
M
E
_
I
n
t
e
r
c
e
p
t
1 LOGIT PARMS 0 Converged X1 -60.7172
2 LOGIT COV
0 Converged Int 26.8391
3 LOGIT COV
0 Converged ZL
-6.5499
_
L
N
L
I
K
E
_
Z
L
Obs
1
2
3
4
5
6
7
8
Z
49.057
52.991
56.911
60.842
64.759
68.691
72.611
76.542
X1
X2
ZL
N
6
13
18
28
52
53
61
60
53
47
44
28
11
6
1
0
3.89298
3.97012
4.04149
4.10828
4.17067
4.22962
4.28512
4.33784
59
60
62
56
63
59
62
60
PHAT
0.05860
0.16404
0.36209
0.60534
0.79517
0.90323
0.95520
0.97905
LOWER95
UPPER95
0.03425
0.11605
0.29841
0.54224
0.73862
0.85972
0.92576
0.96046
0.09850
0.22678
0.43101
0.66511
0.84210
0.93428
0.97330
0.98900
14.8833 -186.235
-6.5499 -186.235
1.5995 -186.235
989
990
#
This code uses the
#
Splus to fit models to the Bliss beetle
glm
function in
#
data.
It is stored in the file
#
#
bliss.ssc
#
First enter the data
# Define a function to extract the estimated
# covariance matrix from the summary object
f.vcov <- function(obj) {
so <- summary(obj, corr=F)
so$dispersion*so$cov.unscaled
}
dose <- c(49.057, 52.991, 56.911, 60.842,
64.759, 68.691, 72.611, 76.542)
ldose <- log(dose)
bliss.lg$vcov <- f.vcov(bliss.lg)
bliss.lg$vcov
ndead <- c(6, 13, 18, 28, 52, 53, 61, 60)
nalive <- c(53, 47, 44, 28, 11, 6, 1, 0)
y <- cbind(ndead, nalive)
# Compute standard errors for the estimated
# probabilities at the dosage levels
#
Now fit a logistic regression of mortality
#
rates on log-dose
# used in the study
sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*%
bliss.lg <- glm(y ~ ldose, x=T, trace=T,
family=binomial(link=logit))
summary(bliss.lg)
t(bliss.lg$x)))) * bliss.lg$fit*
(1-bliss.lg$fit)
cbind(bliss.lg$x, bliss.lg$fit, sp)
992
991
# Estimate the LD50 and construct and
# approximate 95 percent confidence interval
# First make a confidnece interval for log(LD50)
# Construct approximate one-at-a-time
b0 <- bliss.lg$coef[1]
# 95 percent confidence intervals for
b1 <- bliss.lg$coef[2]
# the estimated proportions
slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*%
t(bliss.lg$x))))
llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp
ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp
lower <- exp(llp)/(1+exp(llp))
upper <- exp(ulp)/(1+exp(ulp))
cbind(bliss.lg$x, bliss.lg$fit, lower, upper)
l.ld50 <- -b0/b1
g <- cbind(-1, b0/b1)/b1
sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g))
l.upper <- l.ld50 + (1.96)*sl.ld50
l.lower <- l.ld50 - (1.96)*sl.ld50
ld50 <- signif(exp(l.ld50))
lower <- signif(exp(l.lower))
upper <- signif(exp(l.upper))
sld50 <- signif(ld50*sl.ld50)
cat(ld50, lower, upper, sld50)
993
994
# Compute
p-values for lack-of-fit tests.
# These results may be unreliable for
# Compute the G^2 statistic
# sparse data.
g2 <- 2*sum(x*(log(m2/m1)))
pvalg <- 1-pchisq(g2, df)
goftests <- function(x, m1, m2, df)
{
cat("\n", "
# Compute Pearson chi-squared
# and deviance tests and p-values
df = ", df)
cat("\n", "
# In this function
#
Pearson test = ", round(x2p,2))
cat("\n", "
p-value = ", round(pvalp,2))
cat("\n", " Deviance test = ", round(g2,2))
x = observed counts
#
m1 = expected counts under H0
#
m2 = expected counts under HA
#
df = degrees of freedom
cat("\n", "
df = ", df)
cat("\n", "
p-value = ", round(pvalg,2),"\n"
}
# Compute estimates of expected counts
k <- length(x)
nd <- (ndead+nalive)*bliss.lg$fit
m1 <- m1 + .00000000000000001
na <- (ndead+nalive)-nd
m2 <- m2 + .00000000000000001
my <- c(nd, na)
x2p <- sum(((m1-m2)^2)/m1)
goftests(y,my,y,bliss.lg$df)
pvalp <- 1 - pchisq(x2p, df)
996
995
plot(c(3.8, 4.4), c(0,1), type="n",
xlab="log-dose", ylab="mortality rate",
#
Open a motif
window for displaying plots
#
Windows users should not do this
main="Bliss Beetle Data")
points(ldose, ndead/(ndead+nalive),
pch=18,
motif( )
mkh=0, cex=2.0 )
ld <- seq(3.8, 4.4, 0.02)
lines(ld, predict(bliss.lg,
data.frame(ldose=ld),
#
Plot the estimated curve for the mortality
#
probabilities against the log-dose
type="response"))
#
#
since the plot function uses
#
it only defines and labels the axes and
type="n"
#
writes a title at the top of the plot.
#
#
Plot the estimated curve for the
#
mortality probabilities against dose
plot(c(48, 78), c(0,1), type="n",
#
the
#
the plot orresponding to the observed
points( )
function plots points on
main="Bliss Beetle Data
#
proportions
mkh=0, cex=2.0 )
#
xlab="dose", lab="mortality rate",
ld <- seq(3.8, 4.4, 0.02)
#
the
lines( ) function plots the estimated
#
curve
lines(exp(ld), predict(bliss.lg,
data.frame(ldose=ld),
type="response"))
997
998
#
Now fit a logistic regression of mortality
#
rates on log-dose
bliss.lg <- glm(y ~ ldose, x=T, trace=T,
#
This code uses the
glm
family=binomial(link=logit))
function in
#
Splus to fit models to the Bliss beetle
#
data.
It is stored in the file
#
#
#
summary(bliss.lg)
bliss.ssc
First enter the data
dose <- c(49.057, 52.991, 56.911, 60.842,
64.759, 68.691, 72.611, 76.542)
ldose <- log(dose)
ndead <- c(6, 13, 18, 28, 52, 53, 61, 60)
nalive <- c(53, 47, 44, 28, 11, 6, 1, 0)
y <- cbind(ndead, nalive)
GLM
linear loop 1: deviance = 11.4476
GLM
linear loop 2: deviance = 11.2318
GLM
linear loop 3: deviance = 11.2316
Call: glm(formula = y ~ ldose,
family = binomial(link =
logit), x = T, trace = T)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.594517 -0.394218 0.8328301 1.259146 1.594018
Coefficients:
Value Std. Error
(Intercept) -60.71708
ldose
14.88332
t value
5.173451 -11.73628
1.262945
999
11.78461
1000
(Dispersion Parameter for Binomial family
taken to be 1)
Null Deviance: 284.2024 on 7 degrees of freedom
Residual Deviance: 11.23156 on 6 degrees of freedom
Number of Fisher Scoring Iterations: 3
f.vcov <- function(obj) {
so <- summary(obj, corr=F)
Correlation of Coefficients:
(Intercept)
ldose -0.9996809
#
#
so$dispersion*so$cov.unscaled
}
Print the estimated proportions
and the model matrix
bliss.lg$vcov <- f.vcov(bliss.lg)
bliss.lg$vcov
(Intercept)
cbind(bliss.lg$x, bliss.lg$fit)
(Intercept)
ldose
1
1 3.892983 0.05860329
2
1 3.970122 0.16403780
3
1 4.041489 0.36208748
4
1 4.108280 0.60534026
5
1 4.170673 0.79516940
6
1 4.229618 0.90323318
7
1 4.285116 0.95519837
8
1 4.337840 0.97904849
(Intercept)
ldose
1001
ldose
26.7646 -6.53170
-6.5317
1.59503
1002
# Construct approximate one-at-a-time
# Compute standard errors for the estimated
# probabilities at the dosage levels
# used in the study
# 95 percent confidence intervals for
# the estimated proportions
slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*%
sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*%
t(bliss.lg$x)))) * bliss.lg$fit*
t(bliss.lg$x))))
llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp
ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp
(1-bliss.lg$fit)
lower <- exp(llp)/(1+exp(llp))
cbind(bliss.lg$x, bliss.lg$fit, sp)
upper <- exp(ulp)/(1+exp(ulp))
cbind(bliss.lg$x, bliss.lg$fit, lower, upper)
(Intercept)
ldose
sp
1
1 3.892983 0.05860329 0.015816590
2
1 3.970122 0.16403780 0.028091274
3
1 4.041489 0.36208748 0.033986913
4
1 4.108280 0.60534026 0.031475466
5
1 4.170673 0.79516940 0.026365653
6
1 4.229618 0.90323318 0.018739648
7
1 4.285116 0.95519837 0.011697048
8
1 4.337840 0.97904849 0.006837601
(Intercept)
ldose
1
1 3.892983 0.05860329 0.03427399
2
1 3.970122 0.16403780 0.11608923
3
1 4.041489 0.36208748 0.29844494
4
1 4.108280 0.60534026 0.54228308
5
1 4.170673 0.79516940 0.73867511
6
1 4.229618 0.90323318 0.85977743
7
1 4.285116 0.95519837 0.92580355
8
1 4.337840 0.97904849 0.96049528
1003
1004
# Estimate the LD50 and construct and
# approximate 95 percent confidence interval
# First make a confidnece interval for log(LD50)
b0 <- bliss.lg$coef[1]
b1 <- bliss.lg$coef[2]
1 0.09844225
2 0.22671114
l.ld50 <- -b0/b1
3 0.43096459
g <- cbind(-1, b0/b1)/b1
4 0.66507568
sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g))
5 0.84206186
l.upper <- l.ld50 + (1.96)*sl.ld50
6 0.93425160
l.lower <- l.ld50 - (1.96)*sl.ld50
7 0.97328374
8 0.98898817
ld50 <- signif(exp(l.ld50))
lower <- signif(exp(l.lower))
upper <- signif(exp(l.upper))
sld50 <- signif(ld50*sl.ld50)
cat(ld50, lower, upper, sld50)
59.1182 58.0984 60.156 0.524874
1005
1006
# Compute estimates of expected counts
nd <- (ndead+nalive)*bliss.lg$fit
na <- (ndead+nalive)-nd
my <- c(nd, na)
goftests(y,my,y,bliss.lg$df)
Pearson test =
Degrees of freedom =
p-value =
Deviance test =
df =
p-value =
10.03
6
0.12
11.23
6
0.08
1007
Download