3 1 4 2 7 5 8 6 9 12 10 11 15 13 16 14 19 17 20 18 23 21 24 22 27 25 28 26 31 29 32 30 d=read.delim( geno temp 1 4 1 4 1 4 1 -5 1 -5 1 -5 1 -7 1 -7 1 -7 1 -9 1 -9 1 -9 1 -11 1 -11 1 -11 1 -15 y 6.97 5.66 6.16 19.39 15.94 13.90 29.08 25.03 37.69 50.94 48.67 41.48 54.49 58.12 55.48 57.63 "http://www.public.iastate.edu/~dnett/S511/IonLeakage.txt") d 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 33 35 Example 2: Researchers were interested in studying the cold tolerance of different grasses. 18 plants of genotype 1 and 18 plants of genotype 2 were assigned to 6 temperature treatments using a completely randomized design with 3 plants per combination of genotype and temperature. Following temperature treatment, a measure of ion leakage was obtained from each plant. 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 -15 -15 4 4 4 -5 -5 -5 -7 -7 -7 -9 -9 -9 -11 -11 -11 -15 -15 -15 61.05 56.59 9.03 7.36 6.97 22.13 15.76 23.70 37.75 48.81 38.09 57.33 52.77 58.97 62.85 56.36 61.62 67.19 63.55 65.51 High ion leakage suggests that a plant sustained high damage due to the temperature treatment while low ion leakage suggests little damage. 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 34 36 d$geno=as.factor(d$geno) plot(d$temp,d$y,col=4,pch=19, xlab="Temperature (degrees Celsius)", ylab="Measure of Ion Leakage") 37 39 #Fit the 4-parameter Gompertz model #to the data. Initially, we will ignore #the genotype factor. o=nls(y~b1+b2*exp(-exp(-((temp-b3)/b4))), data=d, start=c(b1=7,b2=55,b3=-5,b4=-2)) 38 40 summary(o) ** *** *** *** Formula: y ~ b1 + b2 * exp(-exp(-((temp - b3)/b4))) Parameters: Estimate Std. Error t value Pr(>|t|) 7.1454 2.0089 3.557 0.00119 55.8565 3.0501 18.313 < 2e-16 -6.0301 0.2331 -25.871 < 2e-16 -2.0950 0.2936 -7.135 4.26e-08 b1 b2 b3 b4 Residual standard error: 4.948 on 32 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 1.995e-06 f .expr1 <- x - b3 .expr4 <- exp(-(.expr1/b4)) .expr6 <- exp(-.expr4) .value <- b1 + b2 * .expr6 .grad <- array(0, c(length(.value), 4L), list(NULL, c("b1","b2","b3","b4"))) .grad[,"b1"] <- 1 .grad[,"b2"] <- .expr6 .grad[,"b3"] <- -(b2*(.expr6*(.expr4*(1/b4)))) .grad[,"b4"] <- -b2*(.expr6*(.expr4*(.expr1/b4^2)))) attr(.value, "gradient") <- .grad .value function (b1, b2, b3, b4, x) { } 41 43 #By default, a numerical algorithm is used #to compute the derivatives needed for #the Gauss-Newton algorithm. In some cases, #there is an advantage to providing the #derivatives along with the function. #This can be done using the deriv function. ** *** *** *** f=deriv(y~b1+b2*exp(-exp(-((x-b3)/b4))), c("b1","b2","b3","b4"), function(b1,b2,b3,b4,x){}) o=nls(y~f(b1,b2,b3,b4,temp), data=d, start=c(b1=6,b2=55,b3=-5,b4=-2)) summary(o) Formula: y ~ f(b1, b2, b3, b4, temp) Parameters: Estimate Std. Error t value Pr(>|t|) 7.1454 2.0089 3.557 0.00119 55.8565 3.0501 18.313 < 2e-16 -6.0301 0.2331 -25.871 < 2e-16 -2.0950 0.2936 -7.135 4.26e-08 b1 b2 b3 b4 Residual standard error: 4.948 on 32 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 1.993e-06 42 44 #Store the least squares estimator of beta. b=coef(o) b b1 b2 b3 b4 7.145431 55.856466 -6.030129 -2.094961 #Add the estimated mean curve to the plot. x=seq(-16,6,by=.1) lines(x,f(b[1],b[2],b[3],b[4],x)) #Examine a plot of standardized #residuals versus fitted values. plot(o) 45 47 #Get an estimate of the variance of #for the squares estimator bhat: #MSE(Dhat'Dhat)^{-1} v=vcov(o) round(v,3) b1 b2 b3 b4 b1 4.036 -4.372 -0.287 0.141 b2 -4.372 9.303 0.112 -0.590 b3 -0.287 0.112 0.054 -0.002 b4 0.141 -0.590 -0.002 0.086 #Obtain the error degrees of freedom. summary(o)$df [1] 4 32 df=summary(o)$df[2] df [1] 32 46 48 #Form a 95% confidence interval for beta_3. b[3]-qt(.975,df)*sqrt(v[3,3]) b3 -6.504898 b[3]+qt(.975,df)*sqrt(v[3,3]) b3 -5.555361 c1 c2 c3 c4 estimate se 95% Conf. limits 1 -1 0 0 -48.71103 4.6992196 -58.283031 -39.139037 0 0 1 0 -6.03013 0.2330801 -6.504898 -5.555361 ci(o,matrix(c(1,-1,0,0, 0,0,1,0),byrow=T,nrow=2)) [1,] [2,] #We can also use the profile function #to invert the reduced versus full model #F test to obtain an approximate 100(1-alpha)% #confidence interval for any one of the #components of beta. 49 51 #The following function can be used to obtain #approximate 100(1-a)% confidence intervals #for each element of C%*%beta. m b=coef(nlsout) V=vcov(nlsout) df=summary(nlsout)$df[2] Cb=C%*%b se=sqrt(diag(C%*%V%*%t(C))) tval=qt(1-a/2,df) low=Cb-tval*se up=Cb+tval*se m=cbind(C,Cb,se,low,up) dimnames(m)[[2]]=c(paste("c",1:ncol(C),sep=""), "estimate","se", paste(100*(1-a),"% Conf.",sep=""), "limits") ci=function(nlsout,C,a=0.05) { } tau par.vals.b3 -2.4216397 -6.613587 -2.0181718 -6.514136 -1.6146002 -6.415617 -1.2108769 -6.317984 -0.8069124 -6.221170 -0.4025805 -6.125083 0.0000000 -6.030129 0.4050864 -5.935176 0.8143166 -5.839683 1.2225874 -5.744620 1.6296353 -5.649763 2.0351862 -5.554827 2.4389609 -5.459462 op=profile(o,which=3,alpha=0.05) b3=as.matrix(op$b3)[,c(1,4)] b3 1 2 3 4 5 6 7 8 9 10 11 12 13 50 52 0.01064759 0.02601096 0.05810807 0.11740501 0.21283697 0.34496687 0.50000000 0.65594529 0.78925876 0.88479292 0.94350769 0.97490734 0.98977153 cbind(pt(b3[,1],df),b3) tau par.vals.b3 -2.4216397 -6.613587 -2.0181718 -6.514136 -1.6146002 -6.415617 -1.2108769 -6.317984 -0.8069124 -6.221170 -0.4025805 -6.125083 0.0000000 -6.030129 0.4050864 -5.935176 0.8143166 -5.839683 1.2225874 -5.744620 1.6296353 -5.649763 2.0351862 -5.554827 2.4389609 -5.459462 1 2 3 4 5 6 7 8 9 10 11 12 13 53 55 #Now suppose we want to estimate the temperature at #which the measure of ion leakage is 50. 54 56 x50=deriv(y~-b4*log(-log((50-b1)/b2))+b3, c("b1","b2","b3","b4"), function(b1,b2,b3,b4){}) x50 .expr2 <- 50 - b1 .expr3 <- .expr2/b2 .expr4 <- log(.expr3) .expr6 <- log(-.expr4) .value <- -b4 * .expr6 + b3 .grad <- array(0, c(length(.value), 4L), list(NULL, c("b1", "b2", "b3", "b4"))) .grad[, "b1"] <- b4 * (1/b2/.expr3/.expr4) .grad[, "b2"] <- b4 * (.expr2/b2^2/.expr3/.expr4) .grad[, "b3"] <- 1 .grad[, "b4"] <- -.expr6 attr(.value, "gradient") <- .grad .value function (b1, b2, b3, b4) { } 57 59 x50(b[1],b[2],b[3],b[4]) b4 -8.812504 attr(,"gradient") b1 b2 b3 b4 [1,] 0.1844918 0.1415470 1 1.328127 est=as.numeric(x50(b[1],b[2],b[3],b[4])) est [1] -8.812504 58 60 #Use the delta method to obtain an #approximate standard error associated #with the estimate. der=attr(x50(b[1],b[2],b[3],b[4]),"gradient") der b1 b2 b3 b4 [1,] 0.1844918 0.1415470 1 1.328127 se=drop(sqrt(der%*%v%*%t(der))) se [1] 0.2643512 #Obtain an approximate 95% #confidence interval for the temperature #at which the measure of ion leakage is 50. est-qt(.975,df)*se [1] -9.35097 est+qt(.975,df)*se [1] -8.274038 61 63 #Now let's consider the genotype variable. plot(d$temp,d$y,col=as.numeric(d$geno), pch=as.numeric(d$geno), xlab="Temperature (degrees Celsius)", ylab="Measure of Ion Leakage") legend("topright", c("Genotype 1","Genotype 2"),col=1:2,pch=1:2) #Let's fit a model that allows a separate #4-parameter Gompertz model for each genotype. ogeno=nls(y~b1[geno]+b2[geno]* exp(-exp(-((temp-b3[geno])/b4[geno]))), data=d, start=list(b1=c(7,7),b2=c(55,55), b3=c(-5,-5),b4=c(-2,-2))) summary(ogeno) Formula: y ~ b1[geno] + b2[geno] * exp(-exp(-((temp - b3[geno])/b4[geno]))) 62 64 Parameters: Estimate Std. Error t value Pr(>|t|) 6.6605 2.0741 3.211 0.003309 ** 7.7669 2.0879 3.720 0.000886 *** 53.9658 3.3329 16.192 9.50e-16 *** 57.4662 2.9887 19.228 < 2e-16 *** -6.3494 0.2655 -23.919 < 2e-16 *** -5.7771 0.2153 -26.838 < 2e-16 *** -2.2082 0.3345 -6.601 3.69e-07 *** -1.9115 0.2683 -7.123 9.45e-08 *** b11 b12 b21 b22 b31 b32 b41 b42 Residual standard error: 3.628 on 28 degrees of freedom Number of iterations to convergence: 7 Achieved convergence tolerance: 2.47e-06 b11 6.660501 b12 b21 b22 b31 b32 b41 b42 7.766861 53.965787 57.466211 -6.349400 -5.777096 -2.208205 -1.911513 b=coef(ogeno) b 65 67 #Add the estimated mean curves to the plot. lines(x,f(b[1],b[3],b[5],b[7],x),col=1) lines(x,f(b[2],b[4],b[6],b[8],x),col=2) #Compare reduced versus full model. anova(o,ogeno) Analysis of Variance Table Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) 32 783.34 28 368.63 4 414.71 7.8751 0.0002197 *** Model 1: y ~ f(b1, b2, b3, b4, temp) Model 2: y ~ b1[geno] + b2[geno] * exp(-exp(-((temp - b3[geno])/b4[geno]))) 1 2 66 68 #Test for a lack of fit by comparing to #a cell means model with different means #for each combination of genotype and temperature ocellmeans=lm(y~geno*as.factor(temp),data=d) anova(ofull,ocellmeans) Analysis of Variance Table Model 1: y ~ b1[geno] + b2[geno] * exp(-exp(-((temp - b3[geno])/b4[geno]))) Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F) 28 368.63 24 334.51 4 34.116 0.6119 0.6581 Model 2: y ~ geno * as.factor(temp) 1 2 69