Thomson et al. Predicting dispersal mechanisms -1- Appendix S3: Code for analyses: ### Code for the univariate analysis, prediction, fixed effects and multivariate models### Notes: File names in the code (i.e. “dispersal4mltvar.csv” and "eu4predict.csv" need to be changed to own personalised file names. If model-building data is changed then the multivariate model (sub-models) would need to be rebuilt. Any data for prediction must be in the same layout as model-building dataset (refer to Appendix S2). library(lme4) # The original data for building the models dat<-read.csv("dispersal.csv") # The following variable names are from the original dispersal data dat<-dat[order(dat$family,dat$species),] dat<-dat[dat$other==0,] # Adjust the original dispersal data # The function of univariate analysis unimod<-function(y,X,z) { n.vars<-dim(X)[2] y.fit = y X.fit = data.frame(X,z) Mod.fit = data.frame(y.fit,X.fit) colnames(Mod.fit)[1]<-"y.fit" colnames(Mod.fit)[ncol(Mod.fit)]<-"Family" vnm<-names(X)[1] form<-as.formula(paste("y.fit~", vnm,"+(1|Family)")) mm<-lmer(form,family=binomial,data=Mod.fit) pp<-pnorm(fixef(mm)/sqrt(diag(vcov(mm)))) pp<-2*(ifelse(pp>0.5,1-pp,pp)) univar<-c(vnm,floor(cbind(fixef(mm),sqrt(diag(vcov(mm))),pp)[-1,]*10000)/10000) for (ii in 2:n.vars){ vnm<-names(X)[ii] form<-as.formula(paste("y.fit~", vnm,"+(1|Family)")) mm<-lmer(form,family=binomial,data=Mod.fit) pp<-pnorm(fixef(mm)/sqrt(diag(vcov(mm)))) pp<-2*(ifelse(pp>0.5,1-pp,pp)) univar<-rbind(univar,c(vnm,floor(cbind(fixef(mm),sqrt(diag(vcov(mm))),pp)[1,]*10000)/10000)) } return(univar) } # The univariate analysis of the response variables nn<-ncol(dat)-2 # Define the column of the last response variable 1 Thomson et al. Predicting dispersal mechanisms -2- y<-dat[,23] # The first response variable z<-dat[,2] # The column of the family for each species x<-dat[,c(4,7:12,14:22,(nn+1):(nn+2))] # The columns for the independent variables gr<-cbind(names(dat)[23],unimod(y,x,z)) # Do the univariate analysis of the second up to the last response variable for (kk in 24:nn) { y<-dat[,kk] z<-dat[,2] x<-dat[,c(4,7:12,14:22,(nn+1):(nn+2))] gr<-rbind(gr,cbind(names(dat)[kk],unimod(y,x,z))) } gr<-rbind(c("response","variable","coef","se","pvalue"),gr) # the outputs of the fixed effects in the univariate analysis write.table(gr,file="dispersal4univar.csv",sep=",",row.names=FALSE,col.names=TRUE) # The extract function of fixed effects in the multivariate analysis fxf<-function(yy){ pp<-pnorm(fixef(yy)/sqrt(diag(vcov(yy)))) pp<-2*(ifelse(pp>0.5,1-pp,pp)) rsp<-strsplit(as.character(formula(yy)),"~",fixed=TRUE)[[2]] if (length(fixef(yy)[-1])==1) fxr<-c(rsp,names((fixef(yy)[1])),floor(deviance(yy)),floor(cbind(fixef(yy),sqrt(diag(vcov(yy))),pp)[-1,]*10000)/10000) if (length(fixef(yy)[-1])>1) fxr<-cbind(rsp,names((fixef(yy)[1])),floor(deviance(yy)),floor(cbind(fixef(yy),sqrt(diag(vcov(yy))),pp)[-1,]*10000)/10000) fxr } # New data for the prediction and their variables names need to be the same as the original data dat0<-read.csv("euspecies.csv") dat0<-dat0[order(dat0$family,dat0$species),] dat0<-dat0[dat0$other==0,] # The extract function for predicting the new data prd<-function(yy){ fmn<-data.frame(cbind(family=rownames((ranef(yy))$family),p=(ranef(yy))$family[,1])) fmn<-fmn[order(fmn$family),] dat1<-merge(dat0,fmn,all.x=TRUE) dat1<-dat1[!is.na(dat1$p),] dat1$p<-as.numeric(levels(dat1$p)[dat1$p]) xname<-names(fixef(yy)[-1]) cnf<-eval(parse(text=paste("dat1$",xname[1],sep="")))*fixef(yy)[2] if (length(xname)>1) { for (ii in 2:length(xname)) cnf<-cnf+eval(parse(text=paste("dat1$",xname[ii],sep="")))*fixef(yy)[(ii+1)] } cnf<-cnf+dat1$p+fixef(yy)[1] 2 Thomson et al. Predicting dispersal mechanisms -3- dat1$p<-floor(exp(cnf)/(1+exp(cnf))*10000)/10000 rsp<-strsplit(as.character(formula(yy)),"~",fixed=TRUE)[[2]] dat1<-dat1[,c(1:2,ncol(dat1))] colnames(dat1)[3]<-rsp dat1<-dat1[order(dat1$family,dat1$species),] dat1 } # Multivariate models need to be rebuilt if the original data are changed mm<-lmer(wind~shrub+tree+canseed+(1|family),family=binomial,data=dat) mltv<-fxf(mm) prdct<-prd(mm) mm<-lmer(water~herb+F+sdwt100+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mm<-lmer(Ant~climber+herb+tree+GSWM+RF+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mm<lmer(endo~climber+herb+tree+RF+sdwt01+sdwt100+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mm<-lmer(exo~GSWM+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mm<-lmer(vert~climber+tree+RF+sdwt01+sdwt100+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mm<-lmer(none~herb+tree+GSWM+sdwt01+(1|family),family=binomial,data=dat) mltv<-rbind(mltv,fxf(mm)) prdct<-merge(prdct,prd(mm),all=TRUE) mltv<-rbind(c("response","variable","deviance","coef","se","pvalue"),mltv) # The ouputs of the fixed effects in the multivariate analysis write.table(mltv,file="dispersal4mltvar.csv",sep=",",row.names=FALSE,col.names=TRUE) # The outputs of the prediction write.table(prdct,file="eu4predict.csv",sep=",",row.names=FALSE,col.names=TRUE) 3