##### Program to Implement Traitspace Model on Simulated Data ##### ##### ##### ##### This program first simulates data for 3 species, 2 traits and ##### ##### 1 environmental factor. It then implements the Traitspace model ##### ##### developed by Laughlin et al. on this simulated data. ##### ############################################################################## ##### Authors: Chaitanya Joshi and Daniel C Laughlin, Univ of Waikato,NZ ##### ############################################################################## ###Required packages library(mclust) library(MASS) ###Data simulation to implement Traitspace Ntr= 100 # number of trait values to be simulated for each species ###Generate trait1 values for three species spA.t1 <- rnorm(Ntr,10,4) spB.t1 <- rnorm(Ntr,20,4) spC.t1 <- rnorm(Ntr,30,4) ###Generate trait2 values for three species spA.t2 <- rnorm(Ntr,0.8,0.1) spB.t2 <- rnorm(Ntr,0.6,0.1) spC.t2 <- rnorm(Ntr,0.4,0.1) ###Generate relationship between individual-level trait values and an environmental gradient #Stack the simulated trait data t1<-as.data.frame(cbind(spA.t1, spB.t1, spC.t1)) t1.stacked<- stack(as.data.frame(t1)) t2<-as.data.frame(cbind(spA.t2, spB.t2, spC.t2)) t2.stacked<-stack(t2) #Simulate environmental data as a function of the traits to emulate a trait-environment relationship #Note in real situations the environment is measured independently from trait data! ###Use this function to simulate a situation where traits are related to the environment env <- rnorm(3*Ntr, 500 + (-5)*t1.stacked$values + (10)*t2.stacked$values + rnorm(300, mean=0,sd=1), 50) ###Use this function to simulate a situation where traits are not related to the environment #env <- rnorm(3*Ntr,10,1) ###Plot the densities and trait-environment relationships visually #This code is adapted from: #http://onertipaday.blogspot.com/2007/09/plotting-two-or-more-overlapping.html X11() plot.multi.dens <- function(s) { junk.x = NULL junk.y = NULL for(i in 1:length(s)) { junk.x = c(junk.x, density(s[[i]])$x) junk.y = c(junk.y, density(s[[i]])$y) } xr <- range(junk.x) yr <- range(junk.y) plot(density(s[[1]]), xlim = xr, ylim = yr, main = "", xlab="Trait 1", cex.lab=1.5) for(i in 1:length(s)) { lines(density(s[[i]]), xlim = xr, ylim = yr, col = i, lwd=3) } } # the input of the following function MUST be a numeric list par(mfrow=c(2,2)) plot.multi.dens(list(spA.t1, spB.t1, spC.t1)) legend("topright",c("Species A","Species B","Species C"), col=c(1,2,3), lty=c(1,1,1), lwd=c(4,4,4), cex=0.85, bty="n") plot.multi.dens <- function(s) { junk.x = NULL junk.y = NULL for(i in 1:length(s)) { junk.x = c(junk.x, density(s[[i]])$x) junk.y = c(junk.y, density(s[[i]])$y) } xr <- range(junk.x) yr <- range(junk.y) plot(density(s[[1]]), xlim = xr, ylim = yr, main = "", xlab="Trait 2", cex.lab=1.5) for(i in 1:length(s)) { lines(density(s[[i]]), xlim = xr, ylim = yr, col = i, lwd=3) } } plot.multi.dens(list(spA.t2, spB.t2, spC.t2)) ###Plot env-trait relationships plot(env,t1.stacked$values, xlab="Environmental gradient", ylab="Trait 1", cex.lab=1.5) plot(env,t2.stacked$values, xlab="Environmental gradient", ylab="Trait 2", cex.lab=1.5) ###Combine dataset into a simple table species <- t1.stacked$ind trait1 <- t1.stacked$values trait2 <- t2.stacked$values env <- env Sp_data <- cbind(species,trait1,trait2,env) ###END of Data Simulation ### ### Traitspace on the simulated data ### ## STEP 1a: Generalised Linear Model(GLM) to compute P(T/E) #Sorting the Enviromental variable - for prediction senv = sort(env) env_p = data.frame(env=senv) # Fitting a linear model on trait1 lm1 = lm(trait1~env) summary(lm1) std1=sd(lm1$residuals) pred1=predict.lm(lm1,env_p,se.fit=TRUE,interval="prediction",level=0.95) x11() par(mfrow=c(1,2)) plot(env,trait1, pch=16, xlab="Environmental gradient", ylab="Trait 1", font=2, cex=1, font.lab=2, cex.lab=1.3) lines(senv,pred1$fit[,1],lwd=3,col=1, lty=1) lines(senv,pred1$fit[,2],lwd=3,col=1, lty=2) lines(senv,pred1$fit[,3],lwd=3,col=1, lty=2) # Fitting a linear model on trait2 lm2 = lm(trait2~env) summary(lm2) std2=sd(lm2$residuals) pred2=predict.lm(lm2,env_p,se.fit=TRUE,interval="prediction",level=0.95) plot(env,trait2, pch=16, xlab="Environmental gradient", ylab="Trait 2", font=2, cex=1, font.lab=2, cex.lab=1.3) lines(senv,pred2$fit[,1],lwd=3,col=1, lty=1) lines(senv,pred2$fit[,2],lwd=3,col=1, lty=2) lines(senv,pred2$fit[,3],lwd=3,col=1, lty=2) ## STEP 1b: Using Mclust to compute P(T/Sk) #Note that warnings are common - refer to mclust library guide sp2s=Ntr+1 sp2e=2*Ntr sp3s=2*Ntr+1 sp3e=3*Ntr species1 = Sp_data[1:Ntr,2:3] #trait data for species 1 species2 = Sp_data[sp2s:sp2e,2:3] #trait data for species 2 species3 = Sp_data[sp3s:sp3e,2:3] #trait data for species 3 pdf1 = Mclust(species1,warn=TRUE) par1=pdf1$parameters pdf2 = Mclust(species2,warn=TRUE) par2=pdf2$parameters pdf3 = Mclust(species3,warn=TRUE) par3=pdf3$parameters ## STEP 2a: Drawing samples from P(T/E) #Note that in this example we use the locations on the simulated environmental gradient (env) #for simplicity. But in real situations, you would use the set of environmental conditions #at which you are interested in predicting species abundances. For example, to test and validate #the model, you would use environmental conditions measured at plot locations where species #abundances are known, as we did in Laughlin et al. N = 1000 #sample size per location on the environmental gradient trait1_sample=rep(0,length(senv)*N) trait2_sample=rep(0,length(senv)*N) temp_prd=rep(0,length(senv)*N) #temp array for plotting ct=1 for (j in 1:length(senv)) { ct=(j-1)*N+1 trait1_sample[ct:(ct+(N-1))]= rnorm(N,pred1[[1]][j],std1) trait2_sample[ct:(ct+(N-1))]= rnorm(N,pred2[[1]][j],std2) temp_prd[ct:(ct+(N-1))]= senv[j] } trt_sample = cbind(trait1_sample,trait2_sample) ## computing(P(T/E)) P_T1_E=rep(0,length(senv)*N) P_T2_E=rep(0,length(senv)*N) ct=1 for (j in 1:length(senv)) { ct=(j-1)*N+1 P_T1_E[ct:(ct+(N-1))]= dnorm(trait1_sample[ct:(ct+(N-1))],pred1[[1]][j],std1) P_T2_E[ct:(ct+(N-1))]= dnorm(trait2_sample[ct:(ct+(N-1))],pred2[[1]][j],std2) } P_T_E=exp(log(P_T1_E)+log(P_T2_E)) ###Plotting samples x11() par(mfrow=c(1,2)) plot(temp_prd,trait1_sample, font=2, cex=0.1, font.lab=2, plot(temp_prd,trait2_sample, font=2, cex=0.1, font.lab=2, pch=1, xlab="Environmental gradient", ylab="Trait 1 samples", cex.lab=1.3, col="blue") pch=1, xlab="Environmental gradient", ylab="Trait 2 samples", cex.lab=1.3, col="blue") ## Step 2b: Computing the likelihood P(T/Sk) using Mclust done earlier P_T_S1=dens(pdf1$modelName,trt_sample,parameters=par1) P_T_S2=dens(pdf2$modelName,trt_sample,parameters=par2) P_T_S3=dens(pdf3$modelName,trt_sample,parameters=par3) P_T_S = cbind(P_T_S1,P_T_S2,P_T_S3) ## Step 2c: Computing posterior P(Sk/T,E)using Bayes theorem P_T_S_pr = P_T_S/3 #multiplying likelihood by flat prior - numerator in Bayes thm P_T_S_pr_sum=apply(P_T_S_pr,1,sum) # denominator in Bayes thm. P_S_T_E = matrix(0,dim(P_T_S)[1],3) for (i in 1:dim(P_T_S)[1]) { P_S_T_E[i,] = exp(log(P_T_S_pr[i,]) - log(P_T_S_pr_sum[i]))} #using log ## Step 2d: Posterior P(Sk/T) by integrating out T's P_S_E_all = matrix(0,length(senv)*N,3) P_S_E_unnorm = matrix(0,length(senv),3) #unnormalised P_S_E P_S_E = matrix(0,length(senv),3) #Computing the integrand (with log) for (i in 1:dim(P_S_E_all)[1]) { P_S_E_all[i,]=exp(log(P_T_E[i])+log(P_S_T_E[i,]))} #MC integration and normalisation c=1 for (k in 1:length(senv)) { P_S_E_unnorm[k,]=apply(P_S_E_all[c:(c+(N-1)),],2,mean) #MC P_S_E[k,]=P_S_E_unnorm[k,]/sum(P_S_E_unnorm[k,]) #normalisation c=(k-1)*N+1} apply(P_S_E,1,sum) # should produce a vector of 1's indicating that valid # posterior probability distributions have been obtained. #Inspect predictions for each species (columns) at each location (rows) P_S_E #plotting the relative abundance estimates x11() plot(senv, xlim=range(senv), ylim=c(0,1), xlab="Environmental gradient", ylab="Predicted relative abundance", cex.lab=1.4, col="white", font.lab=2, font=2) species1_pred = P_S_E[,1] species2_pred = P_S_E[,2] species3_pred = P_S_E[,3] points(senv,species1_pred, col=1, pch=16) points(senv,species2_pred, col=2, pch=16) points(senv,species3_pred, col=3, pch=16) legend("left",c("Species A","Species B","Species C"), col=c(1,2,3), lty=c(1,1,1), lwd=c(4,4,4), cex=1.3, bty="n") ###End of Program