a little piece of code

advertisement
### SAVE THE CONTENTS OF THIS FILE AS AN R SCRIPT AND PLAY!!
########################################################################################################
# FUNCTION evaldesign
#
# SUMMARY: This function evaluates the performance of a design for the single-season single-species
# occupancy-detection model with constant probabilities of occupancy and detectability, for a user-specified
# scenario. The function generates simulated histories, calculates the corresponding MLEs for psi and p,
# and evaluates estimator performance. It assumes a standard design where the same survey effort is applied
# to all sampled sites.
#
# CALL: e.g. myres<-evaldesign(psi=0.2,p=0.3,E=240,K=4,C1=1,nsims=10000,doprint=1,doplot=1)
#
(first source this file, source("SODA_v150714.R"))
#
# INPUT
#
- psi: assumed value for the probability of occupancy
#
- p: assumed value for the probability of detection
#
- E: total budget available for survey effort (in units of survey visits, but note C1)
#
- K: number of repeat surveys per site
#
- C1: cost of first visit (in units of other survey visits, this allows considering cases where the first
#
visit to each site is more expensive, e.g. due to travelling costs. C1 is how many times it is more costly)
#
- nsims: number of simulations to run
#
- doprint: print results on screen
#
- doplot: plot the distribution of MLEs
#
# OUTPUT (myres$)
#
- dist: matrix containing the results of the simulation. Each row contains details for
#
each history type summarized by the sufficient statistics (Sd,dT)
#
dist[,1] contain Sd (number of sites where the species was detected)
#
dist[,2] contains dT (total number of detections in the history)
#
dist[,3] contains the probability of obtaining that history type (i.e. pair (Sd,dT))
#
dist[,4] contains the estimate for the probability of occupancy
#
dist[,5] contains the estimate for the probability of detection
#
- biaspsi,varpsi,MSEpsi: occupancy estimator bias/variance/MSE (excl empty histories)
#
- biasp,varp,MSEp: detectability estimator bias/variance/MSE (excl empty histories)
#
- covar: occupancy and detectability estimator covariance (excl empty histories)
#
- critA: sum of the MSEs (excl empty histories)
#
- critB: determinant of the MSE matrix (excl empty histories)
#
- biaspsi_B,varpsi_B,MSEpsi_B: occupancy estimator bias/variance/MSE (excl also boundary estimates)
#
- ...
#
- pempty: percentage of empty histories obtained
#
- pbound: percentage of histories obtained that produce boundary estimates (i.e. psi=1)
#
# HISTORY:
#
- 2010: original code published as companion to the paper:
#
Guillera-Arroita, G. et al (2010) Design of occupancy studies with imperfect detection.
#
Methods in Ecology and Evolution, 1, 131-139.
#
- April 2015: modified to allow for greater costs for the first survey to each site (C1>1)
#
########################################################################################################
evaldesign <- function(psi,p,E,K,C1=1,nsims=1e4,doprint=T,doplot=T,mypch=19,mysymbcex=1,mymain="",
myxlab=expression(hat("p")),myylab=expression(hat("psi"))) {
t1<-proc.time()
mydist<-matrix(NA,0,5)
S<-floor(E/(C1+K-1))
for (ii in 1:nsims)
{
# generate a history and summarize (sufficient statistics)
Sp<-rbinom(1,S,psi)
hist<-rbind(matrix(rbinom(Sp*K,1,p),Sp,K),matrix(0,S-Sp,K))
Sd<-sum(rowSums(hist)>0)
dT<-sum(hist)
#hist<-gethist(psi,p,E,K,C1=C1,isS=1)
#Sd<-hist$summ1;dT<-hist$summ2
# count history
if (ii==1) {mydist<-rbind(mydist,c(Sd,dT,1,NA,NA))
}else{
temp<-which((mydist[,1]==Sd))
temp2<-which((mydist[temp,2]==dT))
if (length(temp2)==0) mydist<-rbind(mydist,c(Sd,dT,1,NA,NA))
else mydist[temp[temp2],3]<-mydist[temp[temp2],3]+1
#add one count to this {Sd,dT} history
}
}
#end for nsims
# analyze each history type obtained in simulation
for (ii in 1:nrow(mydist))
{
Sd<-mydist[ii,1]
dT<-mydist[ii,2]
if (Sd==0){
#empty histories
psihat<-NA; phat<-NA
} else {
if (((S-Sd)/S)<((1-dT/(S*K))^K)) { #boundary estimates
psihat <- 1
phat <- dT/(S*K)
} else {
params = c(psi,p)
#init vals for optim
fitted1 = optim(params,loglikf,Sd=Sd,dT=dT,S=S,K=K)
psihat <- 1/(1+exp(-fitted1$par[1]))
phat <- 1/(1+exp(-fitted1$par[2]))
}
}
# store estimates
mydist[ii,4]<-psihat
mydist[ii,5]<-phat
mydist[ii,3]<-mydist[ii,3]/nsims
}
# MLE properties: distribution removing empty histories
mydist2<-mydist[mydist[,1]!=0,]
mydist2[,3]<-mydist2[,3]/sum(mydist2[,3])
mymeanpsi<-sum(mydist2[,3]*mydist2[,4],na.rm='true')
mybiaspsi<-mymeanpsi-psi
myvarpsi<-sum((mydist2[,4]-mymeanpsi)^2*mydist2[,3])
myMSEpsi<-myvarpsi+mybiaspsi^2
mymeanp<-sum(mydist2[,3]*mydist2[,5],na.rm='true')
mybiasp<-mymeanp-p
myvarp<-sum((mydist2[,5]-mymeanp)^2*mydist2[,3])
myMSEp<-myvarp+mybiasp^2
mycovar<-sum((mydist2[,5]-mymeanp)*(mydist2[,4]-mymeanpsi)*mydist2[,3])
mycritA<-myMSEpsi+myMSEp
mycritD<-myMSEpsi*myMSEp-mycovar^2
# MLE properties: distribution removing also boundary estimates
mydist3<-mydist2[mydist2[,4]!=1,]
mydist3[,3]<-mydist3[,3]/sum(mydist3[,3])
mymeanpsi_B<-sum(mydist3[,3]*mydist3[,4],na.rm='true')
mybiaspsi_B<-mymeanpsi_B-psi
myvarpsi_B<-sum((mydist3[,4]-mymeanpsi_B)^2*mydist3[,3])
myMSEpsi_B<-myvarpsi_B+mybiaspsi_B^2
mymeanp_B<-sum(mydist3[,3]*mydist3[,5],na.rm='true')
mybiasp_B<-mymeanp_B-p
myvarp_B<-sum((mydist3[,5]-mymeanp_B)^2*mydist3[,3])
myMSEp_B<-myvarp_B+mybiasp_B^2
mycovar_B<-sum((mydist3[,5]-mymeanp_B)*(mydist3[,4]-mymeanpsi_B)*mydist3[,3])
mycritA_B<-myMSEpsi_B+myMSEp_B
mycritD_B<-myMSEpsi_B*myMSEp_B-mycovar_B^2
pempty<-100*mydist[mydist[,1]==0,3]
if (!length(pempty)) pempty=0
pbound<-100*sum(mydist[mydist[,4]==1,3],na.rm='true')
if (!length(pbound)) pbound=0
# compute processing time
t2=proc.time()
# print in screen results
if (doprint) {
cat("\n--------------------------------------------------------------------------\n",sep = "")
cat("Evaluation of design: K = ",K,", S = ",S, ", C1 = ",C1," (E = ",S*(C1+K-1),")\n",sep = "")
cat("--------------------------------------------------------------------------\n",sep = "")
cat("estimator performance (excl empty histories)\n",sep = "")
cat("psi: bias = ",sprintf("%+0.4f",mybiaspsi),"
var = ",sprintf("%+0.4f",myvarpsi),"
MSE =
",sprintf("%+0.4f",myMSEpsi),"\n",sep = "")
cat(" p: bias = ",sprintf("%+0.4f",mybiasp),"
var = ",sprintf("%+0.4f",myvarp),"
MSE = ",sprintf("%+0.4f",myMSEp),"\n",sep =
"")
cat("
covar = ",sprintf("%+0.4f",mycovar)," critA = ",sprintf("%+0.4f",mycritA)," critD = ",sprintf("%+.3e",mycritD),"\n",sep
= "")
cat("estimator performance (excl also histories leading to boundary estimates)\n",sep = "")
cat("psi: bias = ",sprintf("%+0.4f",mybiaspsi_B),"
var = ",sprintf("%+0.4f",myvarpsi_B),"
MSE =
",sprintf("%+0.4f",myMSEpsi_B),"\n",sep = "")
cat(" p: bias = ",sprintf("%+0.4f",mybiasp_B),"
var = ",sprintf("%+0.4f",myvarp_B),"
MSE =
",sprintf("%+0.4f",myMSEp_B),"\n",sep = "")
cat("
covar = ",sprintf("%+0.4f",mycovar_B)," critA = ",sprintf("%+0.4f",mycritA_B)," critD =
",sprintf("%+.3e",mycritD_B),"\n",sep = "")
cat(" empty histories = ",sprintf("%0.1f",pempty),"%\n",sep = "")
cat(" boundary estimates = ",sprintf("%0.1f",pbound),"%\n",sep = "")
cat("this took ", (t2-t1)[1],"seconds \n")
cat("--------------------------------------------------------------------------\n\n",sep = "")
}
# write results
myres <- list(dist=mydist,biaspsi=mybiaspsi,varpsi=myvarpsi,MSEpsi=myMSEpsi,biasp=mybiasp,varp=myvarp,
MSEp=myMSEp,covar=mycovar,critA=mycritA,critD=mycritD,biaspsi_B=mybiaspsi_B,varpsi_B=myvarpsi_B,
MSEpsi_B=myMSEpsi_B,biasp_B=mybiasp_B,varp_B=myvarp_B,MSEp_B=myMSEp_B,covar_B=mycovar_B,
critA_B=mycritA_B,critD_B=mycritD_B,pempty=pempty,pbound=pbound)
if (doplot) plotMLEdist(myres$dist,p,psi,mypch,mysymbcex,mymain,myxlab=myxlab,myylab=myylab)
return(myres)
}
########################################################################################################
########################################################################################################
# FUNCTION loglikf
#
# SUMMARY: Computes the likelihood function for the single-season single-species occupancy model with
# constant probabilities of occupancy and detectability, given a history summarized by (Sd,dT)
#
# INPUT
#
- params: values of psi and p where the likelihood is evaluated
#
- Sd: number of sites where the species was detected
#
- dT: total number of detections in the history
#
- S: number of sites surveyed
#
- K: number of replicated surveys per site
# OUTPUT
#
- loglik: value of the likelihood function
#
########################################################################################################
loglikf <- function(params, Sd, dT, S, K) {
psi
= 1/(1+exp(-params[1]))
p
= 1/(1+exp(-params[2]))
loglik = -(Sd*log(psi)+dT*log(p)+(K*Sd-dT)*log(1-p)+(S-Sd)*log((1-psi)+psi*(1-p)^K))
}
########################################################################################################
########################################################################################################
# FUNCTION plotMLEdist
#
# SUMMARY: displays the distribution of the MLEs obtained for the given design and values of
# occupancy and detectability (single-season single-species occupancy model with constant probabilities)
#
########################################################################################################
plotMLEdist <- function(mydist, p, psi,mypch=19,mysymbcex=1,mymain="",myxlab=expression(hat("p")),myylab=expression(hat("psi")))
{
mcol<-rev(rainbow(200))[c(52:200,1:16)]
x <-mydist[,3]
y <-as.integer((length(mcol)-1)*(x-min(x))/(max(x)-min(x)))+1
par.defaults<-par(no.readonly=TRUE);
par(pty = "s")
plot(mydist[,5],mydist[,4],col=mcol[y],xlim=c(0,1),ylim=c(0,1),xlab="",
ylab="",pch=mypch,cex=mysymbcex,main=mymain)
mtext(side=1,line=2.5,text=myxlab)
mtext(side=2,line=2.0,text=myylab)
abline(v=p, col="lightgray")
abline(h=psi, col="lightgray")
}
########################################################################################################
Download