/* This program is stored as bliss.sas */ /* This program uses PROC LOGISTIC in SAS to fit models with logistic, probit, and complimentary log-log link functions to the beetle mortality data collected by Chester Bliss */ CARDS; 49.057 52.991 56.911 60.842 64.759 68.691 72.611 76.542 RUN; 6 53 13 47 18 44 28 28 52 11 53 6 61 1 60 0 PROC PRINT DATA=SET1; TITLE ' LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA' ; RUN; DATA SET1; INPUT Z X1 X2; ZL = LOG(Z); N = X1+x2; PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP1; MODEL X1/N = ZL / ITPRINT COVB MAXITER=50 CONVERGE=.000001 PPROB=.5 SELECTION=NONE; OUTPUT OUT=SETR1 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05; TITLE 'LOGISTIC REGRESSION MODEL ON LOG(DOSE) FOR THE BLISS DATA'; RUN; LABEL Z = DOSE ZL = LOG(DOSE) X1 = NUMBER DEAD X2 = NUMBER ALIVE N = NUMBER EXPOSED; 975 976 proc iml; start ld50; PROC PRINT DATA=SETP1; use set2; TITLE 'DATA SET CONTAINING INFORMATION read all into b; ON PARAMETER ESTIMATES'; use set3; PROC PRINT DATA=SETR1; read all into v; TITLE ' ESTIMATES OF MORTALITY RATES'; ld50 = -b[1,1]/b[1,2]; g = (-1 || b[1,1]/b[1,2])/b[1,2]; /* Estimate the LD50 */ s = sqrt(g*v*t(g)); lower = ld50 - (1.96)*s; data set2; set setp1; upper = ld50 + (1.96)*s; if(_TYPE_ = 'PARMS'); keep intercept zl; run; ld50 = exp(ld50); stderr = ld50*s; data set3; set setp1; lower=exp(lower); if(_TYPE_ = 'COV'); upper=exp(upper); keep intercept zl; run; print ld50 stderr lower upper; finish; 977 978 run ld50; SYMBOL1 V=NONE I=SPLINE L=1 W=2 H=2; SYMBOL2 V=circle H=2; /* Plot the onserved proportions and RUN; the fitted curve */ PROC GPLOT DATA=SETR1; DATA SETR1; SET SETR1; PROB=X1/(X1+X2); AXIS1 LABEL= (H=2.5 R=0 A=90 RUN; F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) /* Use this to create graphs in Windows */ goptions LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2; cback=white colors=(black) AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) device=WIN target=WINPRTC VALUE=(H=2) rotate=portrait; LENGTH=6 in /* Use this to produce a postscript plot in the VINCENT system */ ORDER=3.8 TO 4.4 BY 0.1; PLOT PHAT*ZL PROB*ZL/ OVERLAY VAXIS=AXIS1 HAXIS=AXIS2; /* filename graffile pipe 'lpr -Dpostscript'; goptions gsfmode=replace gsfname=graffile cback=white colors=(black) targetdevice=ps300 rotate=landscape;*/ TITLE H=3. F=swiss 'LOGISTIC REGRESSION ANALYSIS'; TITLE2 H=3 F=swiss 'Bliss beetle data'; 980 979 /* FIT A PROBIT MODEL TO THE BLISS DATA */ PROC GPLOT DATA=SETR2; PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP2; AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss MODEL X1/N = ZL / ITPRINT COVB LINK=NORMIT 'Mortality Rate' ) PPROB=.5 SELECTION=NONE; VALUE=(H=2 F=swiss) OUTPUT OUT=SETR2 L=LOWER95 P=PHAT U=UPPER95 LENGTH=5 in / ALPHA=.05; ORDER=0.0 TO 1.0 BY 0.2; AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) TITLE 'PROBIT MODEL USING LOG(DOSE) FOR THE BLISS DATA'; VALUE=(H=2 F=swiss) RUN; LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1; PLOT PHAT*ZL PROB*ZL / OVERLAY PROC PRINT DATA=SETP2; TITLE 'DATA SET CONTAINING INFORMATION VAXIS=AXIS1 HAXIS=AXIS2; ON PARAMETER ESTIMATES'; TITLE H=3. F=swiss 'PROBIT ANALYSIS'; PROC PRINT DATA=SETR2; TITLE2 H=3 F=swiss 'Bliss beetle data'; TITLE ' ESTIMATES OF MORTALITY RATES'; RUN; DATA SETR2; SET SETR2; PROB= X1/(X1+X2); RUN; 981 982 /* Fit the complimentary log-log model to the Bliss data */ PROC GPLOT DATA=SETR3; TITLE ' COMPLIMENTARY LOG-LOG AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss ANALYSIS OF THE BLISS DATA' ; 'Mortality Rate' ) PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP3; VALUE=(H=2 F=swiss ) MODEL X1/N = ZL / ITPRINT COVB LINK=CLOGLOG LENGTH=5 in PPROB=.5 SELECTION=NONE; OUTPUT OUT=SETR3 L=LOWER95 P=PHAT ORDER=0.0 TO 1.0 BY 0.2; AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) U=UPPER95 / ALPHA=.05; VALUE=(H=2 F=swiss) TITLE 'COMPLIMENTARY LOG-LOG MODEL LENGTH=6 in FOR THE BLISS DATA'; ORDER=3.8 TO 4.4 BY 0.1; run; PLOT PHAT*ZL=1 PROB*ZL=2 / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2; PROC PRINT DATA=SETP3; TITLE H=3. F=swiss TITLE 'DATA SET CONTAINING INFORMATION 'COMPLIMENTARY LOG-LOG ANALYSIS'; ON PARAMETER ESTIMATES'; TITLE2 H=3 F=swiss 'Bliss beetle data'; PROC PRINT DATA=SETR3; RUN; TITLE ' ESTIMATES OF MORTALITY RATES'; DATA SETR3; SET SETR3; PROB = X1/(X1+X2); RUN; 984 983 LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA Response Profile Obs Z X1 X2 ZL N 1 49.057 6 53 3.89298 59 2 52.991 13 47 3.97012 60 3 56.911 18 44 4.04149 62 1 Event 291 4 60.842 28 28 4.10828 56 2 Nonevent 190 5 64.759 52 11 4.17067 63 6 68.691 53 6 4.22962 59 7 72.611 61 1 4.28512 62 8 76.542 60 0 4.33784 60 Ordered Value Outcome Total Frequency Maximum Likelihood Iteration History Iter Model Information Binary Ridge -2 Log L Intercept ZL 0 0 645.441025 0.426299 0 1 0 395.941537 -39.615600 9.694171 -54.667149 13.394642 Data Set WORK.SET1 2 0 374.092238 Response Variable (Events) X1 3 0 372.484914 -60.122455 14.736966 Response Variable (Trials) N 4 0 372.470133 -60.711104 14.881848 Number of Observations 8 5 0 372.470132 -60.717199 14.883348 Link Function Logit 6 0 372.470132 -60.717199 14.883348 Optimization Technique Fisher's scoring 985 986 Analysis of Maximum Likelihood Estimates Standard Model Fit Statistics Intercept Intercept Criterion and Only Covariates AIC 647.441 376.470 SC 651.617 384.822 -2 Log L 645.441 372.470 Estimate Error Chi-Square Intercept 1 -60.7172 5.1806 137.3587 <.0001 ZL 1 14.8833 1.2647 138.4904 <.0001 Odds Ratio Estimates Point Testing Global Null Hypothesis: BETA=0 Test Chi-Square Likelihood Ratio DF Pr > ChiSq 1 <.0001 272.9709 Pr > C Parameter DF Score 227.5810 1 <.0001 Wald 138.4904 1 <.0001 Effect Estimate ZL >999.999 95% Wald Confidence Limits >999.999 >999.999 Association of Predicted Probabilities and Observed Responses Percent Concordant 87.0 Somers' D 0.802 Percent Discordant 6.8 Gamma 0.856 Percent Tied 6.3 Tau-a 0.384 Pairs 55290 c 0.901 988 987 Estimated Covariance Matrix Variable Intercept ZL Intercept ZL 26.83906 -6.54992 -6.54992 1.59949 ESTIMATES OF MORTALITY RATES DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES O b s _ L I N K _ _ T Y P E _ _ S T A T U S _ _ N A M E _ I n t e r c e p t 1 LOGIT PARMS 0 Converged X1 -60.7172 2 LOGIT COV 0 Converged Int 26.8391 3 LOGIT COV 0 Converged ZL -6.5499 _ L N L I K E _ Z L Obs 1 2 3 4 5 6 7 8 Z 49.057 52.991 56.911 60.842 64.759 68.691 72.611 76.542 X1 X2 ZL N 6 13 18 28 52 53 61 60 53 47 44 28 11 6 1 0 3.89298 3.97012 4.04149 4.10828 4.17067 4.22962 4.28512 4.33784 59 60 62 56 63 59 62 60 PHAT 0.05860 0.16404 0.36209 0.60534 0.79517 0.90323 0.95520 0.97905 LOWER95 UPPER95 0.03425 0.11605 0.29841 0.54224 0.73862 0.85972 0.92576 0.96046 0.09850 0.22678 0.43101 0.66511 0.84210 0.93428 0.97330 0.98900 14.8833 -186.235 -6.5499 -186.235 1.5995 -186.235 989 990 # This code uses the # Splus to fit models to the Bliss beetle glm function in # data. It is stored in the file # # bliss.ssc # First enter the data # Define a function to extract the estimated # covariance matrix from the summary object f.vcov <- function(obj) { so <- summary(obj, corr=F) so$dispersion*so$cov.unscaled } dose <- c(49.057, 52.991, 56.911, 60.842, 64.759, 68.691, 72.611, 76.542) ldose <- log(dose) bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) # Compute standard errors for the estimated # probabilities at the dosage levels # Now fit a logistic regression of mortality # rates on log-dose # used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% bliss.lg <- glm(y ~ ldose, x=T, trace=T, family=binomial(link=logit)) summary(bliss.lg) t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) 992 991 # Estimate the LD50 and construct and # approximate 95 percent confidence interval # First make a confidnece interval for log(LD50) # Construct approximate one-at-a-time b0 <- bliss.lg$coef[1] # 95 percent confidence intervals for b1 <- bliss.lg$coef[2] # the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50) 993 994 # Compute p-values for lack-of-fit tests. # These results may be unreliable for # Compute the G^2 statistic # sparse data. g2 <- 2*sum(x*(log(m2/m1))) pvalg <- 1-pchisq(g2, df) goftests <- function(x, m1, m2, df) { cat("\n", " # Compute Pearson chi-squared # and deviance tests and p-values df = ", df) cat("\n", " # In this function # Pearson test = ", round(x2p,2)) cat("\n", " p-value = ", round(pvalp,2)) cat("\n", " Deviance test = ", round(g2,2)) x = observed counts # m1 = expected counts under H0 # m2 = expected counts under HA # df = degrees of freedom cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalg,2),"\n" } # Compute estimates of expected counts k <- length(x) nd <- (ndead+nalive)*bliss.lg$fit m1 <- m1 + .00000000000000001 na <- (ndead+nalive)-nd m2 <- m2 + .00000000000000001 my <- c(nd, na) x2p <- sum(((m1-m2)^2)/m1) goftests(y,my,y,bliss.lg$df) pvalp <- 1 - pchisq(x2p, df) 996 995 plot(c(3.8, 4.4), c(0,1), type="n", xlab="log-dose", ylab="mortality rate", # Open a motif window for displaying plots # Windows users should not do this main="Bliss Beetle Data") points(ldose, ndead/(ndead+nalive), pch=18, motif( ) mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(ld, predict(bliss.lg, data.frame(ldose=ld), # Plot the estimated curve for the mortality # probabilities against the log-dose type="response")) # # since the plot function uses # it only defines and labels the axes and type="n" # writes a title at the top of the plot. # # Plot the estimated curve for the # mortality probabilities against dose plot(c(48, 78), c(0,1), type="n", # the # the plot orresponding to the observed points( ) function plots points on main="Bliss Beetle Data # proportions mkh=0, cex=2.0 ) # xlab="dose", lab="mortality rate", ld <- seq(3.8, 4.4, 0.02) # the lines( ) function plots the estimated # curve lines(exp(ld), predict(bliss.lg, data.frame(ldose=ld), type="response")) 997 998 # Now fit a logistic regression of mortality # rates on log-dose bliss.lg <- glm(y ~ ldose, x=T, trace=T, # This code uses the glm family=binomial(link=logit)) function in # Splus to fit models to the Bliss beetle # data. It is stored in the file # # # summary(bliss.lg) bliss.ssc First enter the data dose <- c(49.057, 52.991, 56.911, 60.842, 64.759, 68.691, 72.611, 76.542) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) GLM linear loop 1: deviance = 11.4476 GLM linear loop 2: deviance = 11.2318 GLM linear loop 3: deviance = 11.2316 Call: glm(formula = y ~ ldose, family = binomial(link = logit), x = T, trace = T) Deviance Residuals: Min 1Q Median 3Q Max -1.594517 -0.394218 0.8328301 1.259146 1.594018 Coefficients: Value Std. Error (Intercept) -60.71708 ldose 14.88332 t value 5.173451 -11.73628 1.262945 999 11.78461 1000 (Dispersion Parameter for Binomial family taken to be 1) Null Deviance: 284.2024 on 7 degrees of freedom Residual Deviance: 11.23156 on 6 degrees of freedom Number of Fisher Scoring Iterations: 3 f.vcov <- function(obj) { so <- summary(obj, corr=F) Correlation of Coefficients: (Intercept) ldose -0.9996809 # # so$dispersion*so$cov.unscaled } Print the estimated proportions and the model matrix bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov (Intercept) cbind(bliss.lg$x, bliss.lg$fit) (Intercept) ldose 1 1 3.892983 0.05860329 2 1 3.970122 0.16403780 3 1 4.041489 0.36208748 4 1 4.108280 0.60534026 5 1 4.170673 0.79516940 6 1 4.229618 0.90323318 7 1 4.285116 0.95519837 8 1 4.337840 0.97904849 (Intercept) ldose 1001 ldose 26.7646 -6.53170 -6.5317 1.59503 1002 # Construct approximate one-at-a-time # Compute standard errors for the estimated # probabilities at the dosage levels # used in the study # 95 percent confidence intervals for # the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp (1-bliss.lg$fit) lower <- exp(llp)/(1+exp(llp)) cbind(bliss.lg$x, bliss.lg$fit, sp) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) (Intercept) ldose sp 1 1 3.892983 0.05860329 0.015816590 2 1 3.970122 0.16403780 0.028091274 3 1 4.041489 0.36208748 0.033986913 4 1 4.108280 0.60534026 0.031475466 5 1 4.170673 0.79516940 0.026365653 6 1 4.229618 0.90323318 0.018739648 7 1 4.285116 0.95519837 0.011697048 8 1 4.337840 0.97904849 0.006837601 (Intercept) ldose 1 1 3.892983 0.05860329 0.03427399 2 1 3.970122 0.16403780 0.11608923 3 1 4.041489 0.36208748 0.29844494 4 1 4.108280 0.60534026 0.54228308 5 1 4.170673 0.79516940 0.73867511 6 1 4.229618 0.90323318 0.85977743 7 1 4.285116 0.95519837 0.92580355 8 1 4.337840 0.97904849 0.96049528 1003 1004 # Estimate the LD50 and construct and # approximate 95 percent confidence interval # First make a confidnece interval for log(LD50) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] 1 0.09844225 2 0.22671114 l.ld50 <- -b0/b1 3 0.43096459 g <- cbind(-1, b0/b1)/b1 4 0.66507568 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) 5 0.84206186 l.upper <- l.ld50 + (1.96)*sl.ld50 6 0.93425160 l.lower <- l.ld50 - (1.96)*sl.ld50 7 0.97328374 8 0.98898817 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50) 59.1182 58.0984 60.156 0.524874 1005 1006 # Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Pearson test = Degrees of freedom = p-value = Deviance test = df = p-value = 10.03 6 0.12 11.23 6 0.08 1007