1 3 2 4

advertisement
3
1
4
2
7
5
8
6
9
12
10
11
15
13
16
14
19
17
20
18
23
21
24
22
27
25
28
26
31
29
32
30
d=read.delim(
geno temp
1
4
1
4
1
4
1
-5
1
-5
1
-5
1
-7
1
-7
1
-7
1
-9
1
-9
1
-9
1 -11
1 -11
1 -11
1 -15
y
6.97
5.66
6.16
19.39
15.94
13.90
29.08
25.03
37.69
50.94
48.67
41.48
54.49
58.12
55.48
57.63
"http://www.public.iastate.edu/~dnett/S511/IonLeakage.txt")
d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
33
35
Example 2:
Researchers were interested in studying the cold tolerance of
different grasses.
18 plants of genotype 1 and 18 plants of genotype 2 were
assigned to 6 temperature treatments using a completely
randomized design with 3 plants per combination of genotype
and temperature.
Following temperature treatment, a measure of ion leakage
was obtained from each plant.
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
-15
-15
4
4
4
-5
-5
-5
-7
-7
-7
-9
-9
-9
-11
-11
-11
-15
-15
-15
61.05
56.59
9.03
7.36
6.97
22.13
15.76
23.70
37.75
48.81
38.09
57.33
52.77
58.97
62.85
56.36
61.62
67.19
63.55
65.51
High ion leakage suggests that a plant sustained high damage
due to the temperature treatment while low ion leakage
suggests little damage.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
34
36
d$geno=as.factor(d$geno)
plot(d$temp,d$y,col=4,pch=19,
xlab="Temperature (degrees Celsius)",
ylab="Measure of Ion Leakage")
37
39
#Fit the 4-parameter Gompertz model
#to the data. Initially, we will ignore
#the genotype factor.
o=nls(y~b1+b2*exp(-exp(-((temp-b3)/b4))),
data=d,
start=c(b1=7,b2=55,b3=-5,b4=-2))
38
40
summary(o)
**
***
***
***
Formula: y ~ b1 + b2 * exp(-exp(-((temp - b3)/b4)))
Parameters:
Estimate Std. Error t value Pr(>|t|)
7.1454
2.0089
3.557 0.00119
55.8565
3.0501 18.313 < 2e-16
-6.0301
0.2331 -25.871 < 2e-16
-2.0950
0.2936 -7.135 4.26e-08
b1
b2
b3
b4
Residual standard error: 4.948 on 32 degrees of
freedom
Number of iterations to convergence: 5
Achieved convergence tolerance: 1.995e-06
f
.expr1 <- x - b3
.expr4 <- exp(-(.expr1/b4))
.expr6 <- exp(-.expr4)
.value <- b1 + b2 * .expr6
.grad <- array(0, c(length(.value), 4L),
list(NULL, c("b1","b2","b3","b4")))
.grad[,"b1"] <- 1
.grad[,"b2"] <- .expr6
.grad[,"b3"] <- -(b2*(.expr6*(.expr4*(1/b4))))
.grad[,"b4"] <- -b2*(.expr6*(.expr4*(.expr1/b4^2))))
attr(.value, "gradient") <- .grad
.value
function (b1, b2, b3, b4, x)
{
}
41
43
#By default, a numerical algorithm is used
#to compute the derivatives needed for
#the Gauss-Newton algorithm. In some cases,
#there is an advantage to providing the
#derivatives along with the function.
#This can be done using the deriv function.
**
***
***
***
f=deriv(y~b1+b2*exp(-exp(-((x-b3)/b4))),
c("b1","b2","b3","b4"),
function(b1,b2,b3,b4,x){})
o=nls(y~f(b1,b2,b3,b4,temp),
data=d,
start=c(b1=6,b2=55,b3=-5,b4=-2))
summary(o)
Formula: y ~ f(b1, b2, b3, b4, temp)
Parameters:
Estimate Std. Error t value Pr(>|t|)
7.1454
2.0089
3.557 0.00119
55.8565
3.0501 18.313 < 2e-16
-6.0301
0.2331 -25.871 < 2e-16
-2.0950
0.2936 -7.135 4.26e-08
b1
b2
b3
b4
Residual standard error: 4.948 on 32 degrees of
freedom
Number of iterations to convergence: 5
Achieved convergence tolerance: 1.993e-06
42
44
#Store the least squares estimator of beta.
b=coef(o)
b
b1
b2
b3
b4
7.145431 55.856466 -6.030129 -2.094961
#Add the estimated mean curve to the plot.
x=seq(-16,6,by=.1)
lines(x,f(b[1],b[2],b[3],b[4],x))
#Examine a plot of standardized
#residuals versus fitted values.
plot(o)
45
47
#Get an estimate of the variance of
#for the squares estimator bhat:
#MSE(Dhat'Dhat)^{-1}
v=vcov(o)
round(v,3)
b1
b2
b3
b4
b1 4.036 -4.372 -0.287 0.141
b2 -4.372 9.303 0.112 -0.590
b3 -0.287 0.112 0.054 -0.002
b4 0.141 -0.590 -0.002 0.086
#Obtain the error degrees of freedom.
summary(o)$df
[1] 4 32
df=summary(o)$df[2]
df
[1] 32
46
48
#Form a 95% confidence interval for beta_3.
b[3]-qt(.975,df)*sqrt(v[3,3])
b3
-6.504898
b[3]+qt(.975,df)*sqrt(v[3,3])
b3
-5.555361
c1 c2 c3 c4 estimate
se 95% Conf.
limits
1 -1 0 0 -48.71103 4.6992196 -58.283031 -39.139037
0 0 1 0 -6.03013 0.2330801 -6.504898 -5.555361
ci(o,matrix(c(1,-1,0,0,
0,0,1,0),byrow=T,nrow=2))
[1,]
[2,]
#We can also use the profile function
#to invert the reduced versus full model
#F test to obtain an approximate 100(1-alpha)%
#confidence interval for any one of the
#components of beta.
49
51
#The following function can be used to obtain
#approximate 100(1-a)% confidence intervals
#for each element of C%*%beta.
m
b=coef(nlsout)
V=vcov(nlsout)
df=summary(nlsout)$df[2]
Cb=C%*%b
se=sqrt(diag(C%*%V%*%t(C)))
tval=qt(1-a/2,df)
low=Cb-tval*se
up=Cb+tval*se
m=cbind(C,Cb,se,low,up)
dimnames(m)[[2]]=c(paste("c",1:ncol(C),sep=""),
"estimate","se",
paste(100*(1-a),"% Conf.",sep=""),
"limits")
ci=function(nlsout,C,a=0.05)
{
}
tau par.vals.b3
-2.4216397
-6.613587
-2.0181718
-6.514136
-1.6146002
-6.415617
-1.2108769
-6.317984
-0.8069124
-6.221170
-0.4025805
-6.125083
0.0000000
-6.030129
0.4050864
-5.935176
0.8143166
-5.839683
1.2225874
-5.744620
1.6296353
-5.649763
2.0351862
-5.554827
2.4389609
-5.459462
op=profile(o,which=3,alpha=0.05)
b3=as.matrix(op$b3)[,c(1,4)]
b3
1
2
3
4
5
6
7
8
9
10
11
12
13
50
52
0.01064759
0.02601096
0.05810807
0.11740501
0.21283697
0.34496687
0.50000000
0.65594529
0.78925876
0.88479292
0.94350769
0.97490734
0.98977153
cbind(pt(b3[,1],df),b3)
tau par.vals.b3
-2.4216397
-6.613587
-2.0181718
-6.514136
-1.6146002
-6.415617
-1.2108769
-6.317984
-0.8069124
-6.221170
-0.4025805
-6.125083
0.0000000
-6.030129
0.4050864
-5.935176
0.8143166
-5.839683
1.2225874
-5.744620
1.6296353
-5.649763
2.0351862
-5.554827
2.4389609
-5.459462
1
2
3
4
5
6
7
8
9
10
11
12
13
53
55
#Now suppose we want to estimate the temperature at
#which the measure of ion leakage is 50.
54
56
x50=deriv(y~-b4*log(-log((50-b1)/b2))+b3,
c("b1","b2","b3","b4"),
function(b1,b2,b3,b4){})
x50
.expr2 <- 50 - b1
.expr3 <- .expr2/b2
.expr4 <- log(.expr3)
.expr6 <- log(-.expr4)
.value <- -b4 * .expr6 + b3
.grad <- array(0, c(length(.value), 4L), list(NULL,
c("b1", "b2", "b3", "b4")))
.grad[, "b1"] <- b4 * (1/b2/.expr3/.expr4)
.grad[, "b2"] <- b4 * (.expr2/b2^2/.expr3/.expr4)
.grad[, "b3"] <- 1
.grad[, "b4"] <- -.expr6
attr(.value, "gradient") <- .grad
.value
function (b1, b2, b3, b4)
{
}
57
59
x50(b[1],b[2],b[3],b[4])
b4
-8.812504
attr(,"gradient")
b1
b2 b3
b4
[1,] 0.1844918 0.1415470 1 1.328127
est=as.numeric(x50(b[1],b[2],b[3],b[4]))
est
[1] -8.812504
58
60
#Use the delta method to obtain an
#approximate standard error associated
#with the estimate.
der=attr(x50(b[1],b[2],b[3],b[4]),"gradient")
der
b1
b2 b3
b4
[1,] 0.1844918 0.1415470 1 1.328127
se=drop(sqrt(der%*%v%*%t(der)))
se
[1] 0.2643512
#Obtain an approximate 95%
#confidence interval for the temperature
#at which the measure of ion leakage is 50.
est-qt(.975,df)*se
[1] -9.35097
est+qt(.975,df)*se
[1] -8.274038
61
63
#Now let's consider the genotype variable.
plot(d$temp,d$y,col=as.numeric(d$geno),
pch=as.numeric(d$geno),
xlab="Temperature (degrees Celsius)",
ylab="Measure of Ion Leakage")
legend("topright",
c("Genotype 1","Genotype 2"),col=1:2,pch=1:2)
#Let's fit a model that allows a separate
#4-parameter Gompertz model for each genotype.
ogeno=nls(y~b1[geno]+b2[geno]*
exp(-exp(-((temp-b3[geno])/b4[geno]))),
data=d,
start=list(b1=c(7,7),b2=c(55,55),
b3=c(-5,-5),b4=c(-2,-2)))
summary(ogeno)
Formula: y ~ b1[geno] + b2[geno] *
exp(-exp(-((temp - b3[geno])/b4[geno])))
62
64
Parameters:
Estimate Std. Error t value Pr(>|t|)
6.6605
2.0741
3.211 0.003309 **
7.7669
2.0879
3.720 0.000886 ***
53.9658
3.3329 16.192 9.50e-16 ***
57.4662
2.9887 19.228 < 2e-16 ***
-6.3494
0.2655 -23.919 < 2e-16 ***
-5.7771
0.2153 -26.838 < 2e-16 ***
-2.2082
0.3345 -6.601 3.69e-07 ***
-1.9115
0.2683 -7.123 9.45e-08 ***
b11
b12
b21
b22
b31
b32
b41
b42
Residual standard error: 3.628 on 28 degrees of
freedom
Number of iterations to convergence: 7
Achieved convergence tolerance: 2.47e-06
b11
6.660501
b12
b21
b22
b31
b32
b41
b42
7.766861 53.965787 57.466211 -6.349400 -5.777096 -2.208205 -1.911513
b=coef(ogeno)
b
65
67
#Add the estimated mean curves to the plot.
lines(x,f(b[1],b[3],b[5],b[7],x),col=1)
lines(x,f(b[2],b[4],b[6],b[8],x),col=2)
#Compare reduced versus full model.
anova(o,ogeno)
Analysis of Variance Table
Res.Df Res.Sum Sq Df Sum Sq F value
Pr(>F)
32
783.34
28
368.63 4 414.71 7.8751 0.0002197 ***
Model 1: y ~ f(b1, b2, b3, b4, temp)
Model 2: y ~ b1[geno] + b2[geno] *
exp(-exp(-((temp - b3[geno])/b4[geno])))
1
2
66
68
#Test for a lack of fit by comparing to
#a cell means model with different means
#for each combination of genotype and temperature
ocellmeans=lm(y~geno*as.factor(temp),data=d)
anova(ofull,ocellmeans)
Analysis of Variance Table
Model 1: y ~ b1[geno] + b2[geno] *
exp(-exp(-((temp - b3[geno])/b4[geno])))
Res.Df Res.Sum Sq Df Sum Sq F value Pr(>F)
28
368.63
24
334.51 4 34.116 0.6119 0.6581
Model 2: y ~ geno * as.factor(temp)
1
2
69
Download