ele1852-sup-0001-AppendixS1

advertisement
##### Program to Implement Traitspace Model on Simulated Data
#####
#####
#####
##### This program first simulates data for 3 species, 2 traits and
#####
##### 1 environmental factor. It then implements the Traitspace model
#####
##### developed by Laughlin et al. on this simulated data.
#####
##############################################################################
##### Authors: Chaitanya Joshi and Daniel C Laughlin, Univ of Waikato,NZ #####
##############################################################################
###Required packages
library(mclust)
library(MASS)
###Data simulation to implement Traitspace
Ntr= 100 # number of trait values to be simulated for each species
###Generate trait1 values for three species
spA.t1 <- rnorm(Ntr,10,4)
spB.t1 <- rnorm(Ntr,20,4)
spC.t1 <- rnorm(Ntr,30,4)
###Generate trait2 values for three species
spA.t2 <- rnorm(Ntr,0.8,0.1)
spB.t2 <- rnorm(Ntr,0.6,0.1)
spC.t2 <- rnorm(Ntr,0.4,0.1)
###Generate relationship between individual-level trait values and an environmental gradient
#Stack the simulated trait data
t1<-as.data.frame(cbind(spA.t1, spB.t1, spC.t1))
t1.stacked<- stack(as.data.frame(t1))
t2<-as.data.frame(cbind(spA.t2, spB.t2, spC.t2))
t2.stacked<-stack(t2)
#Simulate environmental data as a function of the traits to emulate a trait-environment
relationship
#Note in real situations the environment is measured independently from trait data!
###Use this function to simulate a situation where traits are related to the environment
env <- rnorm(3*Ntr, 500 + (-5)*t1.stacked$values + (10)*t2.stacked$values + rnorm(300,
mean=0,sd=1), 50)
###Use this function to simulate a situation where traits are not related to the environment
#env <- rnorm(3*Ntr,10,1)
###Plot the densities and trait-environment relationships visually
#This code is adapted from:
#http://onertipaday.blogspot.com/2007/09/plotting-two-or-more-overlapping.html
X11()
plot.multi.dens <- function(s)
{
junk.x = NULL
junk.y = NULL
for(i in 1:length(s)) {
junk.x = c(junk.x, density(s[[i]])$x)
junk.y = c(junk.y, density(s[[i]])$y)
}
xr <- range(junk.x)
yr <- range(junk.y)
plot(density(s[[1]]), xlim = xr, ylim = yr, main = "", xlab="Trait 1", cex.lab=1.5)
for(i in 1:length(s)) {
lines(density(s[[i]]), xlim = xr, ylim = yr, col = i, lwd=3)
}
}
# the input of the following function MUST be a numeric list
par(mfrow=c(2,2))
plot.multi.dens(list(spA.t1, spB.t1, spC.t1))
legend("topright",c("Species A","Species B","Species C"),
col=c(1,2,3),
lty=c(1,1,1),
lwd=c(4,4,4), cex=0.85, bty="n")
plot.multi.dens <- function(s)
{
junk.x = NULL
junk.y = NULL
for(i in 1:length(s)) {
junk.x = c(junk.x, density(s[[i]])$x)
junk.y = c(junk.y, density(s[[i]])$y)
}
xr <- range(junk.x)
yr <- range(junk.y)
plot(density(s[[1]]), xlim = xr, ylim = yr, main = "", xlab="Trait 2", cex.lab=1.5)
for(i in 1:length(s)) {
lines(density(s[[i]]), xlim = xr, ylim = yr, col = i, lwd=3)
}
}
plot.multi.dens(list(spA.t2, spB.t2, spC.t2))
###Plot env-trait relationships
plot(env,t1.stacked$values, xlab="Environmental gradient", ylab="Trait 1", cex.lab=1.5)
plot(env,t2.stacked$values, xlab="Environmental gradient", ylab="Trait 2", cex.lab=1.5)
###Combine dataset into a simple table
species <- t1.stacked$ind
trait1 <- t1.stacked$values
trait2 <- t2.stacked$values
env <- env
Sp_data <- cbind(species,trait1,trait2,env)
###END of Data Simulation ###
### Traitspace on the simulated data ###
## STEP 1a: Generalised Linear Model(GLM) to compute P(T/E)
#Sorting the Enviromental variable - for prediction
senv = sort(env)
env_p = data.frame(env=senv)
# Fitting a linear model on trait1
lm1 = lm(trait1~env)
summary(lm1)
std1=sd(lm1$residuals)
pred1=predict.lm(lm1,env_p,se.fit=TRUE,interval="prediction",level=0.95)
x11()
par(mfrow=c(1,2))
plot(env,trait1, pch=16, xlab="Environmental gradient", ylab="Trait 1", font=2, cex=1,
font.lab=2, cex.lab=1.3)
lines(senv,pred1$fit[,1],lwd=3,col=1, lty=1)
lines(senv,pred1$fit[,2],lwd=3,col=1, lty=2)
lines(senv,pred1$fit[,3],lwd=3,col=1, lty=2)
# Fitting a linear model on trait2
lm2 = lm(trait2~env)
summary(lm2)
std2=sd(lm2$residuals)
pred2=predict.lm(lm2,env_p,se.fit=TRUE,interval="prediction",level=0.95)
plot(env,trait2, pch=16, xlab="Environmental gradient", ylab="Trait 2", font=2, cex=1,
font.lab=2, cex.lab=1.3)
lines(senv,pred2$fit[,1],lwd=3,col=1, lty=1)
lines(senv,pred2$fit[,2],lwd=3,col=1, lty=2)
lines(senv,pred2$fit[,3],lwd=3,col=1, lty=2)
## STEP 1b: Using Mclust to compute P(T/Sk)
#Note that warnings are common - refer to mclust library guide
sp2s=Ntr+1
sp2e=2*Ntr
sp3s=2*Ntr+1
sp3e=3*Ntr
species1 = Sp_data[1:Ntr,2:3]
#trait data for species 1
species2 = Sp_data[sp2s:sp2e,2:3] #trait data for species 2
species3 = Sp_data[sp3s:sp3e,2:3] #trait data for species 3
pdf1 = Mclust(species1,warn=TRUE)
par1=pdf1$parameters
pdf2 = Mclust(species2,warn=TRUE)
par2=pdf2$parameters
pdf3 = Mclust(species3,warn=TRUE)
par3=pdf3$parameters
## STEP 2a: Drawing samples from P(T/E)
#Note that in this example we use the locations on the simulated environmental gradient (env)
#for simplicity. But in real situations, you would use the set of environmental conditions
#at which you are interested in predicting species abundances. For example, to test and
validate
#the model, you would use environmental conditions measured at plot locations where species
#abundances are known, as we did in Laughlin et al.
N = 1000
#sample size per location on the environmental gradient
trait1_sample=rep(0,length(senv)*N)
trait2_sample=rep(0,length(senv)*N)
temp_prd=rep(0,length(senv)*N) #temp array for plotting
ct=1
for (j in 1:length(senv))
{
ct=(j-1)*N+1
trait1_sample[ct:(ct+(N-1))]= rnorm(N,pred1[[1]][j],std1)
trait2_sample[ct:(ct+(N-1))]= rnorm(N,pred2[[1]][j],std2)
temp_prd[ct:(ct+(N-1))]= senv[j]
}
trt_sample = cbind(trait1_sample,trait2_sample)
## computing(P(T/E))
P_T1_E=rep(0,length(senv)*N)
P_T2_E=rep(0,length(senv)*N)
ct=1
for (j in 1:length(senv))
{
ct=(j-1)*N+1
P_T1_E[ct:(ct+(N-1))]= dnorm(trait1_sample[ct:(ct+(N-1))],pred1[[1]][j],std1)
P_T2_E[ct:(ct+(N-1))]= dnorm(trait2_sample[ct:(ct+(N-1))],pred2[[1]][j],std2)
}
P_T_E=exp(log(P_T1_E)+log(P_T2_E))
###Plotting samples
x11()
par(mfrow=c(1,2))
plot(temp_prd,trait1_sample,
font=2, cex=0.1, font.lab=2,
plot(temp_prd,trait2_sample,
font=2, cex=0.1, font.lab=2,
pch=1, xlab="Environmental gradient", ylab="Trait 1 samples",
cex.lab=1.3, col="blue")
pch=1, xlab="Environmental gradient", ylab="Trait 2 samples",
cex.lab=1.3, col="blue")
## Step 2b: Computing the likelihood P(T/Sk) using Mclust done earlier
P_T_S1=dens(pdf1$modelName,trt_sample,parameters=par1)
P_T_S2=dens(pdf2$modelName,trt_sample,parameters=par2)
P_T_S3=dens(pdf3$modelName,trt_sample,parameters=par3)
P_T_S = cbind(P_T_S1,P_T_S2,P_T_S3)
## Step 2c: Computing posterior P(Sk/T,E)using Bayes theorem
P_T_S_pr = P_T_S/3 #multiplying likelihood by flat prior - numerator in Bayes thm
P_T_S_pr_sum=apply(P_T_S_pr,1,sum) # denominator in Bayes thm.
P_S_T_E = matrix(0,dim(P_T_S)[1],3)
for (i in 1:dim(P_T_S)[1])
{
P_S_T_E[i,] = exp(log(P_T_S_pr[i,]) - log(P_T_S_pr_sum[i]))} #using log
## Step 2d: Posterior P(Sk/T) by integrating out T's
P_S_E_all = matrix(0,length(senv)*N,3)
P_S_E_unnorm = matrix(0,length(senv),3) #unnormalised P_S_E
P_S_E = matrix(0,length(senv),3)
#Computing the integrand (with log)
for (i in 1:dim(P_S_E_all)[1])
{
P_S_E_all[i,]=exp(log(P_T_E[i])+log(P_S_T_E[i,]))}
#MC integration and normalisation
c=1
for (k in 1:length(senv))
{
P_S_E_unnorm[k,]=apply(P_S_E_all[c:(c+(N-1)),],2,mean) #MC
P_S_E[k,]=P_S_E_unnorm[k,]/sum(P_S_E_unnorm[k,]) #normalisation
c=(k-1)*N+1}
apply(P_S_E,1,sum) # should produce a vector of 1's indicating that valid
# posterior probability distributions have been obtained.
#Inspect predictions for each species (columns) at each location (rows)
P_S_E
#plotting the relative abundance estimates
x11()
plot(senv, xlim=range(senv), ylim=c(0,1), xlab="Environmental gradient",
ylab="Predicted relative abundance", cex.lab=1.4, col="white", font.lab=2, font=2)
species1_pred = P_S_E[,1]
species2_pred = P_S_E[,2]
species3_pred = P_S_E[,3]
points(senv,species1_pred, col=1, pch=16)
points(senv,species2_pred, col=2, pch=16)
points(senv,species3_pred, col=3, pch=16)
legend("left",c("Species A","Species B","Species C"),
col=c(1,2,3), lty=c(1,1,1), lwd=c(4,4,4), cex=1.3, bty="n")
###End of Program
Download