Univariate Examples with N Dat

advertisement
Some example programming for R
#Get Data from File
ndata <- read.table("D:/Paul/AAE TEACHING/AAE 575/Data/IowaNData.txt",header=T, as.is=T)
#alternative
setwd("D:/Paul/AAE TEACHING/AAE 575/Data/")
ndata <- read.table("IowaNData.txt",header=T, as.is=T)
#Elissa code: choose the file
ndata <-read.delim(file.choose(), header = TRUE, na = '.')
#Rename the variables
colnames(ndata) <- c("year", "CfC", "Nrate", "yield", "prop_t", "prop_sy")
#Looking at the data
#Look at the data by a single variable or all at once
summary(ndata $ Nrate)
summary (ndata)
#look at variable types and number obs, etc.
str(ndata)
#convert “variable” into a factor: I think this is like my dummy variables
ndata$variable<-as.factor(ndata$variable)
#Plot the data
#Two ways to do the same thing
plot (ndata $yield ~ ndata $Nrate)
plot (yield ~ Nrate, ndata)
#Linear regression with one variable: y = a + b*Nrate
output1 <- lm(yield ~ Nrate, ndata)
#Linear regression with two variables: y = a0 + ac*CfC + b*Nrate
output2 <- lm(yield ~ Nrate + CfC, ndata)
summary(output1)
summary(output2)
plot (yield ~ Nrate, ndata)
abline(output1)
# How to drop intercept if ever want to
output3 <- lm(yield ~ -1 + Nrate, ndata)
summary (output2)
plot (yield ~ Nrate, ndata)
abline(output3)
abline(output1)
#Define TSS to calculate R2 later
sqdev <- (ndata$yield - mean(ndata$yield))^2
tss <- sum(sqdev)
#Define for plots
xx <- seq(0, 300, length=300)
#Linear
linefit <- nls(yield ~ aa + bb*Nrate, ndata, start=list(aa=50, bb=1))
summary (linefit)
ess_linefit <- sum(residuals(linefit)^2)
r2_line <- 1 - (ess_linefit/tss)
r2_line
rmse_line <- sqrt(mean(residuals(linefit)^2))
rmse_line
y_line <- (coef(linefit)["aa"] + coef(linefit)["bb"]*xx)
plot (yield ~ Nrate, ndata)
lines (y_line ~ xx, lty=1, col=2)
#Quadratic
quadfit <- nls(yield ~ aa + bb*Nrate + cc*Nrate*Nrate, ndata, start=list(aa=50, bb=1, cc=-0.1))
summary (quadfit)
ess_quadfit <- sum(residuals(quadfit)^2)
r2_quad <- 1 - (ess_quadfit/tss)
r2_quad
rmse_quad <- sqrt(mean(residuals(quadfit)^2))
rmse_quad
y_quad <- (coef(quadfit)["aa"] + coef(quadfit)["bb"]*xx + coef(quadfit)["cc"]*xx*xx)
plot (yield ~ Nrate, ndata)
lines (y_quad ~ xx, lty=1, col=2)
#alternative
lines (y_quad ~ xx, lty=1, col="red")
#NOTE: lty=1; lty=2, etc.: line type 1 = solid, 2 = dashed, 3 = dotted, etc.
#predict at observed Nrate
qfit <- predict(quadfit)
sqerrqfit <- (ndata$yield - qfit)^2
ess_qfit <- sum(sqerrqfit)
#predict at sequence xx
qfit <- predict(quadfit, list (x = xx))
plot (yield ~ Nrate, ndata, col=1)
plot (qfit ~ Nrate, ndata, col=2)
lines (qfit~Nrate, lty=1,col="blue")
#Negative Exponential
negxfit <- nls(yield ~ ymax*(1-exp(aa+bb*Nrate)) , ndata, start=list(ymax=150,aa=-0.5, bb=-0.01))
summary (negxfit)
ess_negxfit <- sum(residuals(negxfit)^2)
r2_negx <- 1 - (ess_negxfit/tss)
r2_negx
rmse_negx <- sqrt(mean(residuals(negxfit)^2))
rmse_negx
y_negx <- (coef(negxfit)["ymax"]*(1-exp(coef(negxfit)["aa"]+ coef(negxfit)["bb"]*xx)))
plot (yield ~ Nrate, ndata)
lines (y_negx ~ xx, ndata,col=3)
#Hyperbolic
hyprfit <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), ndata, start=list(aa=1, bb=150))
summary (hyprfit)
ess_hyprfit <- sum(residuals(hyprfit)^2)
r2_hypr <- 1 - (ess_hyprfit/tss)
r2_hypr
rmse_hypr <- sqrt(mean(residuals(hyprfit)^2))
rmse_hypr
y_hypr <- (coef(hyprfit)["aa"]*xx/(1+ coef(hyprfit)["aa"]*xx/ coef(hyprfit)["bb"]))
plot (yield ~ Nrate, ndata)
lines (y_hypr ~ xx, ndata, col=4)
#Cobb-Douglas
#Need intercept a0 and need start bb at 1, else divide by zero error
cobbfit <- nls(yield ~ a0 + aa*Nrate**bb, ndata, start=list(a0 = 50, aa=1, bb=1.0))
summary (cobbfit)
ess_cobbfit <- sum(residuals(cobbfit)^2)
r2_cobb <- 1 - (ess_cobbfit/tss)
r2_cobb
rmse_cobb <- sqrt(mean(residuals(cobbfit)^2))
rmse_cobb
y_cobb <- (coef(cobbfit)["a0"] + coef(cobbfit)["aa"]*xx**coef(cobbfit)["bb"])
plot (yield ~ Nrate, ndata)
lines (y_cobb~ xx, ndata, col=6)
#Power
powrfit <- nls(yield ~ aa + Nrate**bb, ndata, start=list(aa=50, bb=1.0))
summary (powrfit)
ess_powrfit <- sum(residuals(powrfit)^2)
r2_powr <- 1 - (ess_powrfit/tss)
r2_powr
rmse_powr <- sqrt(mean(residuals(powrfit)^2))
rmse_powr
y_powr <- (coef(powrfit)["aa"] + xx**coef(powrfit)["bb"])
plot (yield ~ Nrate, ndata)
lines (y_powr~ xx, ndata, col=8)
#Linear Response and Plateau
#Fails if in terms of Ymax: maybe better starting values?
lrpfit <- nls(yield ~ ifelse(aa + bb*Nrate > ymax, ymax, aa + bb*Nrate), ndata, start=list(ymax=160,
aa=90, bb=0.2))
summary (lrpfit)
#Succeeds if in terms of N max, not Y max
lrpfit <- nls(yield ~ ifelse(Nrate > Nmax, aa + bb*Nmax, aa + bb*Nrate), ndata, start=list(Nmax=150,
aa=90, bb=0.2))
summary (lrpfit)
ess_lrpfit <- sum(residuals(lrpfit)^2)
r2_lrp <- 1 - (ess_lrpfit/tss)
r2_lrp
rmse_lrp <- sqrt(mean(residuals(lrpfit)^2))
rmse_lrp
ymaxlrp <- coef(lrpfit)["aa"] + coef(lrpfit)["bb"]*coef(lrpfit)["Nmax"]
y_lrp <- (ifelse(xx > coef(lrpfit)["Nmax"], ymaxlrp, coef(lrpfit)["aa"] + coef(lrpfit)["bb"]*xx))
plot (yield ~ Nrate, ndata)
lines (y_lrp~ xx, ndata, col=5)
#Quadratic Response and Plateau
#FAILS: even if use lrp estimates as starting values
qrpfit <- nls(yield ~ ifelse(Nrate > Nmax, aa + bb*Nmax + cc*Nmax*Nmax, aa + bb*Nrate +
cc*Nrate*Nrate), ndata, start=list(Nmax=113.36, aa=70.7, bb=0.578, cc=0.0))
summary (qrpfit)
#Plot all model fits
plot (yield ~ Nrate, ndata)
lines (y_line ~ xx, ndata,col=1)
lines (y_quad ~ xx, ndata,col=2)
lines (y_negx ~ xx, ndata,col=3)
lines (y_hypr ~ xx, ndata, col=4)
lines (y_cobb~ xx, ndata, col=6)
lines (y_powr~ xx, ndata, col=8)
lines (y_lrp~ xx, ndata, col=5)
#Print all R2
r2_line
r2_quad
r2_negx
r2_hypr
r2_cobb
r2_powr
r2_lrp
#Print all RMSE
rmse_line
rmse_quad
rmse_negx
rmse_hypr
rmse_cobb
rmse_powr
rmse_lrp
#Prepare for analysis by year and/or rotation
#Create subsets of ndata by year
yr87 <- subset(ndata, year==1987)
yr88 <- subset(ndata, year==1988)
yr89 <- subset(ndata, year==1989)
yr90 <- subset(ndata, year==1990)
yr91 <- subset(ndata, year==1991)
yr88cfc <- subset(yr88, CfC==1)
yr88cfs <- subset(yr88, CfC==0)
#Create Dummy Variables for Year
#Make a vector of ones
ones <- rep(1,300)
#Make a vector of zeros
zeros <- ones - ones
D87 <- zeros
D87[ndata$year==1987] <- 1
D88 <- zeros
D88[ndata$year==1988] <- 1
D89 <- zeros
D89[ndata$year==1989] <- 1
D90 <- zeros
D90[ndata$year==1990] <- 1
D91 <- zeros
D91[ndata$year==1991] <- 1
#Test Rotation Effect with Hyperbolic Model
#Main finding: with hyperbolic model, get no significant rotation effect on a or b
#Hyperbolic 1988
hyprfit88 <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88, start=list(aa=1, bb=150))
summary (hyprfit88)
hyprfit88c <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88cfc, start=list(aa=1, bb=150))
summary (hyprfit88c)
hyprfit88s <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88cfs, start=list(aa=1, bb=150))
summary (hyprfit88s)
#Try a joint model
hyprfit88ab <- nls(yield ~ (a_s + a_c*CfC)*Nrate/(1+(a_s + a_c*CfC)*Nrate/(b_s + b_c*CfC)), yr88,
start=list(a_s=1,a_c=0.1, b_s=150,b_c=10))
summary (hyprfit88ab)
hyprfit88b <- nls(yield ~ (aa)*Nrate/(1+( aa)*Nrate/( b_s + b_c*CfC)), yr88, start=list(aa=1,
b_s=150,b_c=10))
summary (hyprfit88b)
hyprfit88a <- nls(yield ~ (a_s + a_c*CfC)*Nrate/(1+( a_s + a_c*CfC)*Nrate/bb), yr88,
start=list(a_s=1,a_c=0.1, bb=120))
summary (hyprfit88a)
Download