Some example programming for R #Get Data from File ndata <- read.table("D:/Paul/AAE TEACHING/AAE 575/Data/IowaNData.txt",header=T, as.is=T) #alternative setwd("D:/Paul/AAE TEACHING/AAE 575/Data/") ndata <- read.table("IowaNData.txt",header=T, as.is=T) #Elissa code: choose the file ndata <-read.delim(file.choose(), header = TRUE, na = '.') #Rename the variables colnames(ndata) <- c("year", "CfC", "Nrate", "yield", "prop_t", "prop_sy") #Looking at the data #Look at the data by a single variable or all at once summary(ndata $ Nrate) summary (ndata) #look at variable types and number obs, etc. str(ndata) #convert “variable” into a factor: I think this is like my dummy variables ndata$variable<-as.factor(ndata$variable) #Plot the data #Two ways to do the same thing plot (ndata $yield ~ ndata $Nrate) plot (yield ~ Nrate, ndata) #Linear regression with one variable: y = a + b*Nrate output1 <- lm(yield ~ Nrate, ndata) #Linear regression with two variables: y = a0 + ac*CfC + b*Nrate output2 <- lm(yield ~ Nrate + CfC, ndata) summary(output1) summary(output2) plot (yield ~ Nrate, ndata) abline(output1) # How to drop intercept if ever want to output3 <- lm(yield ~ -1 + Nrate, ndata) summary (output2) plot (yield ~ Nrate, ndata) abline(output3) abline(output1) #Define TSS to calculate R2 later sqdev <- (ndata$yield - mean(ndata$yield))^2 tss <- sum(sqdev) #Define for plots xx <- seq(0, 300, length=300) #Linear linefit <- nls(yield ~ aa + bb*Nrate, ndata, start=list(aa=50, bb=1)) summary (linefit) ess_linefit <- sum(residuals(linefit)^2) r2_line <- 1 - (ess_linefit/tss) r2_line rmse_line <- sqrt(mean(residuals(linefit)^2)) rmse_line y_line <- (coef(linefit)["aa"] + coef(linefit)["bb"]*xx) plot (yield ~ Nrate, ndata) lines (y_line ~ xx, lty=1, col=2) #Quadratic quadfit <- nls(yield ~ aa + bb*Nrate + cc*Nrate*Nrate, ndata, start=list(aa=50, bb=1, cc=-0.1)) summary (quadfit) ess_quadfit <- sum(residuals(quadfit)^2) r2_quad <- 1 - (ess_quadfit/tss) r2_quad rmse_quad <- sqrt(mean(residuals(quadfit)^2)) rmse_quad y_quad <- (coef(quadfit)["aa"] + coef(quadfit)["bb"]*xx + coef(quadfit)["cc"]*xx*xx) plot (yield ~ Nrate, ndata) lines (y_quad ~ xx, lty=1, col=2) #alternative lines (y_quad ~ xx, lty=1, col="red") #NOTE: lty=1; lty=2, etc.: line type 1 = solid, 2 = dashed, 3 = dotted, etc. #predict at observed Nrate qfit <- predict(quadfit) sqerrqfit <- (ndata$yield - qfit)^2 ess_qfit <- sum(sqerrqfit) #predict at sequence xx qfit <- predict(quadfit, list (x = xx)) plot (yield ~ Nrate, ndata, col=1) plot (qfit ~ Nrate, ndata, col=2) lines (qfit~Nrate, lty=1,col="blue") #Negative Exponential negxfit <- nls(yield ~ ymax*(1-exp(aa+bb*Nrate)) , ndata, start=list(ymax=150,aa=-0.5, bb=-0.01)) summary (negxfit) ess_negxfit <- sum(residuals(negxfit)^2) r2_negx <- 1 - (ess_negxfit/tss) r2_negx rmse_negx <- sqrt(mean(residuals(negxfit)^2)) rmse_negx y_negx <- (coef(negxfit)["ymax"]*(1-exp(coef(negxfit)["aa"]+ coef(negxfit)["bb"]*xx))) plot (yield ~ Nrate, ndata) lines (y_negx ~ xx, ndata,col=3) #Hyperbolic hyprfit <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), ndata, start=list(aa=1, bb=150)) summary (hyprfit) ess_hyprfit <- sum(residuals(hyprfit)^2) r2_hypr <- 1 - (ess_hyprfit/tss) r2_hypr rmse_hypr <- sqrt(mean(residuals(hyprfit)^2)) rmse_hypr y_hypr <- (coef(hyprfit)["aa"]*xx/(1+ coef(hyprfit)["aa"]*xx/ coef(hyprfit)["bb"])) plot (yield ~ Nrate, ndata) lines (y_hypr ~ xx, ndata, col=4) #Cobb-Douglas #Need intercept a0 and need start bb at 1, else divide by zero error cobbfit <- nls(yield ~ a0 + aa*Nrate**bb, ndata, start=list(a0 = 50, aa=1, bb=1.0)) summary (cobbfit) ess_cobbfit <- sum(residuals(cobbfit)^2) r2_cobb <- 1 - (ess_cobbfit/tss) r2_cobb rmse_cobb <- sqrt(mean(residuals(cobbfit)^2)) rmse_cobb y_cobb <- (coef(cobbfit)["a0"] + coef(cobbfit)["aa"]*xx**coef(cobbfit)["bb"]) plot (yield ~ Nrate, ndata) lines (y_cobb~ xx, ndata, col=6) #Power powrfit <- nls(yield ~ aa + Nrate**bb, ndata, start=list(aa=50, bb=1.0)) summary (powrfit) ess_powrfit <- sum(residuals(powrfit)^2) r2_powr <- 1 - (ess_powrfit/tss) r2_powr rmse_powr <- sqrt(mean(residuals(powrfit)^2)) rmse_powr y_powr <- (coef(powrfit)["aa"] + xx**coef(powrfit)["bb"]) plot (yield ~ Nrate, ndata) lines (y_powr~ xx, ndata, col=8) #Linear Response and Plateau #Fails if in terms of Ymax: maybe better starting values? lrpfit <- nls(yield ~ ifelse(aa + bb*Nrate > ymax, ymax, aa + bb*Nrate), ndata, start=list(ymax=160, aa=90, bb=0.2)) summary (lrpfit) #Succeeds if in terms of N max, not Y max lrpfit <- nls(yield ~ ifelse(Nrate > Nmax, aa + bb*Nmax, aa + bb*Nrate), ndata, start=list(Nmax=150, aa=90, bb=0.2)) summary (lrpfit) ess_lrpfit <- sum(residuals(lrpfit)^2) r2_lrp <- 1 - (ess_lrpfit/tss) r2_lrp rmse_lrp <- sqrt(mean(residuals(lrpfit)^2)) rmse_lrp ymaxlrp <- coef(lrpfit)["aa"] + coef(lrpfit)["bb"]*coef(lrpfit)["Nmax"] y_lrp <- (ifelse(xx > coef(lrpfit)["Nmax"], ymaxlrp, coef(lrpfit)["aa"] + coef(lrpfit)["bb"]*xx)) plot (yield ~ Nrate, ndata) lines (y_lrp~ xx, ndata, col=5) #Quadratic Response and Plateau #FAILS: even if use lrp estimates as starting values qrpfit <- nls(yield ~ ifelse(Nrate > Nmax, aa + bb*Nmax + cc*Nmax*Nmax, aa + bb*Nrate + cc*Nrate*Nrate), ndata, start=list(Nmax=113.36, aa=70.7, bb=0.578, cc=0.0)) summary (qrpfit) #Plot all model fits plot (yield ~ Nrate, ndata) lines (y_line ~ xx, ndata,col=1) lines (y_quad ~ xx, ndata,col=2) lines (y_negx ~ xx, ndata,col=3) lines (y_hypr ~ xx, ndata, col=4) lines (y_cobb~ xx, ndata, col=6) lines (y_powr~ xx, ndata, col=8) lines (y_lrp~ xx, ndata, col=5) #Print all R2 r2_line r2_quad r2_negx r2_hypr r2_cobb r2_powr r2_lrp #Print all RMSE rmse_line rmse_quad rmse_negx rmse_hypr rmse_cobb rmse_powr rmse_lrp #Prepare for analysis by year and/or rotation #Create subsets of ndata by year yr87 <- subset(ndata, year==1987) yr88 <- subset(ndata, year==1988) yr89 <- subset(ndata, year==1989) yr90 <- subset(ndata, year==1990) yr91 <- subset(ndata, year==1991) yr88cfc <- subset(yr88, CfC==1) yr88cfs <- subset(yr88, CfC==0) #Create Dummy Variables for Year #Make a vector of ones ones <- rep(1,300) #Make a vector of zeros zeros <- ones - ones D87 <- zeros D87[ndata$year==1987] <- 1 D88 <- zeros D88[ndata$year==1988] <- 1 D89 <- zeros D89[ndata$year==1989] <- 1 D90 <- zeros D90[ndata$year==1990] <- 1 D91 <- zeros D91[ndata$year==1991] <- 1 #Test Rotation Effect with Hyperbolic Model #Main finding: with hyperbolic model, get no significant rotation effect on a or b #Hyperbolic 1988 hyprfit88 <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88, start=list(aa=1, bb=150)) summary (hyprfit88) hyprfit88c <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88cfc, start=list(aa=1, bb=150)) summary (hyprfit88c) hyprfit88s <- nls(yield ~ aa*Nrate/(1+aa*Nrate/bb), yr88cfs, start=list(aa=1, bb=150)) summary (hyprfit88s) #Try a joint model hyprfit88ab <- nls(yield ~ (a_s + a_c*CfC)*Nrate/(1+(a_s + a_c*CfC)*Nrate/(b_s + b_c*CfC)), yr88, start=list(a_s=1,a_c=0.1, b_s=150,b_c=10)) summary (hyprfit88ab) hyprfit88b <- nls(yield ~ (aa)*Nrate/(1+( aa)*Nrate/( b_s + b_c*CfC)), yr88, start=list(aa=1, b_s=150,b_c=10)) summary (hyprfit88b) hyprfit88a <- nls(yield ~ (a_s + a_c*CfC)*Nrate/(1+( a_s + a_c*CfC)*Nrate/bb), yr88, start=list(a_s=1,a_c=0.1, bb=120)) summary (hyprfit88a)