#Code to reproduce simulations described in Sigourney et al. ###WARNINGS### #Filenames may change. May need to rename this file to "Annotated_Simulation_Code_Sigourney_et_al_R3.r" #Make sure to set working directory to same folder that the code and data are saved in # set.seed(122412) #Load libraries library(mvtnorm) library(mrds) library(reshape) library(mgcv) library(lattice) library(MASS) library(plyr) library(Distance) library(rjags) library(tweedie) library(statmod) # Needed to use tweedie family object library(VGAM) library(dsm) ################################################################ ##SIMULATE DATA ################################################################ #Inputs N.total=1000 #Total number of grid cells Area=100 #Area of each grid cell (km^2) p.surveyed=0.5 #Percent of survey area covered N.observed=round(N.total*p.surveyed) #Number of osberved grid cells N.unoberserved=N.total-N.observed #Number of unosberved grid cells #Parametrs of quadratic habitat function Mu.grid=3 #Average number of animals per grid cell b0=log(Mu.grid) #Intercept term for habitat relationship b1=0.5#Linear term for habitat function b2=-0.25 #Quadratic term for habitat function #Simulate habitat covariate value in each grid cell HabCov.All=rnorm(N.total,0,2) #Simulate mean abundance in each grid cell Mean.grid<-exp(b0+b1*HabCov.All+b2*HabCov.All^2) #Calculate true mean abundance for the study area Mean.Pop<-sum(Mean.grid) #Simulate total true abundance in each grid cell N.grid<-array(NA,N.total) for (j in 1:N.total){ N.grid[j]<-round(rtweedie(1, xi=1.3,mu=Mean.grid[j], phi=1)) } Pop.Size<-sum(N.grid) #Parameters of detection function #Trackline detection parameters (i.e., g(0)) p01=0.95 #g(0) for observer 1 p02=0.88 #g(0) for observer 2 #Hazard rate parameters b.df.0=log(2) b.df=1.2 #Surface availability Av.True=0.75 Av_CV=0.05 #Uncertainty in surface availability #Calculate parameters of a beta distribution (see Pardo et al. 2015 for a similar example) c_Av=((Av.True*(1-Av.True))/((Av.True*Av_CV)^2))-1 a_Av=c_Av*Av.True b_Av=c_Av*(1-Av.True) W=5 #Truncation Distance #Function to simulate data Simulate_data<-function(N.grid,N.observed, b0, b1, b2, b3, b.df.0, p01, p02, W, a_Av,b_Av, Av.True, HabCov.All){ #Randomly sample grid cells in study area samp.grids<-sample(1:N.total,N.observed) #Randomly assign search effort for each grid cell (length of transect and area covered) Trans.Length<-runif(N.observed,1,10) #Trackline effort in grid each grid cell (1 to 10 km) EFFORT<-2*W*Trans.Length/Area #Simulate a random estimate of surface availbility from a beta distribution based on true #surface availability Av.est<-rbeta(1,a_Av,b_Av) N<-N.grid[samp.grids] HabCov= HabCov.All[samp.grids] #Identify grid cells with zero animals zero_sights<-which(N==0) n_zero<-length(zero_sights) Dat=matrix(0,sum(N)+n_zero,13) #columns are site, observer 1 ID, obs 2 ID, Y_1, Y_2, Distance, Group size, Habitat pl=1 for(i in 1:N.observed){ effort=EFFORT[i] if(N[i]==0) { Dat[pl,1]=i Dat[pl,2]=1 #Observer 1 Dat[pl,3]=2 #Observer 2 Dat[pl,6]=NA Dat[pl,7]=NA Dat[pl,8]=HabCov[i] Dat[pl,9]=effort Dat[pl,10]=Trans.Length[i] pl=pl+1 } else if (N[i]>0){ for(j in 1:N[i]){ # Covered=as.numeric(runif(1)0*1.0 #seen by at least 1 observer Dat[pl,12]=Covered Dat[pl,13]=Available pl=pl+1 } } } #Put things in new format Dat2=rbind(Dat,Dat) ipl=1 for(irecord in 1:nrow(Dat)){ Dat2[ipl,1]=Dat[irecord,1] Dat2[ipl+1,1]=Dat[irecord,1] Dat2[ipl,2]=irecord #match number Dat2[ipl+1,2]=irecord Dat2[ipl,3]=Dat[irecord,2] Dat2[ipl+1,3]=Dat[irecord,3] Dat2[ipl,4]=Dat[irecord,4] Dat2[ipl+1,4]=Dat[irecord,5] Dat2[ipl,5]=1 Dat2[ipl+1,5]=0 Dat2[ipl,6]=Dat[irecord,6] Dat2[ipl+1,6]=Dat[irecord,6] Dat2[ipl,7]=Dat[irecord,7] Dat2[ipl+1,7]=Dat[irecord,7] Dat2[ipl,8]=Dat[irecord,8] Dat2[ipl+1,8]=Dat[irecord,8] Dat2[ipl,9]=Dat[irecord,9] Dat2[ipl+1,9]=Dat[irecord,9] Dat2[ipl,10]=Dat[irecord,10] Dat2[ipl+1,10]=Dat[irecord,10] Dat2[ipl,11]=Dat[irecord,11] Dat2[ipl+1,11]=Dat[irecord,11] Dat2[ipl,12]=Dat[irecord,12] Dat2[ipl+1,12]=Dat[irecord,12] Dat2[ipl,13]=Dat[irecord,13] Dat2[ipl+1,13]=Dat[irecord,13] ipl=ipl+2 } Dat2=as.data.frame(Dat2) colnames(Dat2)=c("Transect","Match","Observer","Obs","Seat","Distance","Group","Habitat", "Effort","Trans.Length","Seen","Covered", "Available") Dat2[,"Observer"]=as.factor(Dat2[,"Observer"]) Dat2[,"Distance"]=as.numeric(Dat2[,"Distance"]) Dat_All=Dat2 Dat_Sightings=subset(Dat2, Seen==1) Dat_Sightings1=subset(Dat_Sightings, Observer==1) Dat_No_Sightings=subset(Dat2, Seen==0) #For calculating density from mrds Region.Total=matrix(NA, nrow=N.observed, ncol=3) Samples.Total=matrix(NA, nrow=N.observed, ncol=4) HabCov.Total=matrix(NA, nrow=N.observed, ncol=5) for (j in 1:N.observed){ cDat<-subset(Dat_All, Transect==j) cEffort<- cDat$Trans.Length[1] cHabCov<-cDat$Habitat[1] cSeen<-sum(cDat$Seen) cI<-as.numeric(cSeen>0) Region.Total[j,1]<-j Region.Total[j,2]<-Area Region.Total[j,3]<-cI Samples.Total[j,1]<-j Samples.Total[j,2]<-1 Samples.Total[j,3]<-cEffort Samples.Total[j,4]<-cI HabCov.Total[j,1]<-j HabCov.Total[j,2]<-cEffort HabCov.Total[j,3]<-cHabCov HabCov.Total[j,5]<-cI } Region.Total<-as.data.frame(Region.Total) colnames(Region.Total)<-c("Region.Label","Area","Indicator") Region.Total<-subset(Region.Total, Indicator==1) region<-Region.Total[,1:2] Samples.Total<-as.data.frame(Samples.Total) colnames(Samples.Total)<-c("Region.Label", "Sample.Label", "Effort", "Indicator") Samples.Total<-subset(Samples.Total, Indicator==1) samples<-Samples.Total[,1:3] HabCov.Total<-as.data.frame(HabCov.Total) colnames(HabCov.Total)<-c("Transect", "Effort", "HabCov","Indicator") # Out=list(Dat_All=Dat_All,Dat_Sightings=Dat_Sightings, HabCov=HabCov, EFFORT=EFFORT, Trans.Length=Trans.Length, region=region, HabCov.Total=HabCov.Total, # samples=samples, Npred=Npred,HabCov.pred=HabCov.pred, Mean.Pop=Mean.Pop,Dat_No_Sightings=Dat_No_Sightings) Out=list(Dat_All=Dat_All,Dat_Sightings=Dat_Sightings, HabCov=HabCov, EFFORT=EFFORT, Trans.Length=Trans.Length, region=region, HabCov.Total=HabCov.Total, samples=samples, Av.est=Av.est) } #Run multiple simulations Batch<-1 sims<-20 Dat_Mat<-matrix(NA,nrow=sims,ncol=23) #Simulate a dataset Out=Simulate_data(N.grid=N.grid,N.observed=N.observed,b0=b0, b1=b1, b2=b2, b3=b3, b.df.0=b.df.0,p01=p01,p02=p02,W=W, a_Av=a_Av,b_Av=b_Av,Av.True=Av.True, HabCov.All=HabCov.All) ################################################################################ #Run Two-Stage Method ################################################################################ #Organize data from sim for input into DSM anlysis Hab_Mat<-Out$HabCov.Total #Change field names to match required format for Distance All_Dat<-Out$Dat_Sightings colnames(All_Dat)<-c("Region.Label", "object", "observer", "detected", "seat", "distance", "size", "habitat", "effort", "Effort", "seen", "covered","available") Area_vec<-array(Area,dim(All_Dat)[1]) Area_vec<-as.data.frame(Area_vec) colnames(Area_vec)<-c("Area") Sample_vec<-array(1,dim(All_Dat)[1]) Sample_vec<-as.data.frame(Sample_vec) colnames(Sample_vec)<-c("Sample.Label") All_Dat<-cbind(All_Dat,Area_vec, Sample_vec) #Calculate estimate of g(0) from double observer data usig mrds mrds.model=ddf(dsmodel = ~mcds(key = "hr", formula = ~1), mrmodel = ~glm(~observer+distance), data = All_Dat, method = "io",meta.data = list(width = W)) result.sum<-summary(mrds.model) #Extract g(0) estimates and calculate CV g0<- result.sum$mr.summary$average.p0 g0<- result.sum$mr.summary$average.p0 g0_se<- result.sum$mr.summary$average.p0.se g0_cv<-g0_se/g0 #Get estimate of surface availability Av.est<-Out$Av.est #Make data inputs for dsm analysis Transect.Label<-matrix(9999,nrow=dim(Hab_Mat)[1],ncol=1) colnames(Transect.Label)<-c("Transect.Label") Hab_Mat<-cbind(Hab_Mat,Transect.Label) Hab_Mat<-Hab_Mat[,c(2,6,1,3,4)] segdata<- rename(Hab_Mat, c("Transect"="Sample.Label")) All_Dat_1<-subset(All_Dat, observer==1) All_Dat_1$detected=1 obsdata<-All_Dat_1[,c(2,1,7,6,9,11)] obsdata<- rename(obsdata, c("Region.Label"="Sample.Label")) hr.model <- ds(All_Dat_1, W, formula=~1, key = "hr", adjustment = NULL) #Fit DSM and include estimate of g(0) and surface availability in offset mod1 <- dsm(count~s(HabCov), hr.model, segdata, obsdata, availability=Av.est*g0, family=tw(link="log")) #Get grid cell area to make predictions area<-matrix(Area,nrow=dim(HabCov.All)[1],ncol=1) colnames(area)<-c("area") preddata<-data.frame(HabCov=HabCov.All,area=Area) preddata<-cbind(HabCov.All,area) #Predictions dsm.preds <- predict(mod1, preddata, preddata$area) #Calculate variance mod1.var <- dsm.var.gam(mod1, preddata, off.set=preddata$area) out1<-summary(mod1.var) Nhat1<-out1$pred.est se1<-out1$se #More delta method for surface availabiltiy and g(0) variances Nhat1_cv<-(out1$cv^2+Av_CV^2+g0_cv^2)^0.5 cv1<-Nhat1_cv #Calculate log normal 95% confidence intervals log95ci.f = function(x,y){ var1nnum <- log(1+y^2) c <- exp(1.96 * sqrt(var1nnum)) up95ci <- x* c lo95ci <- x/c data.frame(low = round(lo95ci, 10), high = round(up95ci, 10)) } sci1<-log95ci.f(Nhat1, Nhat1_cv) lower1<-sci1$low upper1<-sci1$high #Calulcate coverage coverage1<-as.numeric(Mean.Poplower1) ################################################################################ #Run Bayesian Method ################################################################################ ############################################# #Organize sim data for input into jagam model ############################################# Dat_All<-as.data.frame(Out$Dat_All) HabCov<-Out$HabCov Sim_Sightings<-Out$Dat_Sightings Effort<-Out$EFFORT Trans.Length<-Out$Trans.Length #Sum up number of sightings per grid cell Sightings_Mat<-aggregate(Seen~Transect, data=Sim_Sightings, sum) sight_vec<-Sightings_Mat[,1] N_sights<-length(sight_vec) Sights<-Sightings_Mat[,2]/2 #Make a vector of sightings Sightings=matrix(0,N.observed,1) for (i in 1:N.observed){ for (j in 1:N_sights){ if(sight_vec[j]==i){ Sightings[i]=Sights[j] } } } All_Dat_Count<-Sim_Sightings N_total<-dim(All_Dat_Count)[1] #Format sightings data from Bayesian MRDS analysis Dat1<-matrix(NA,N_total,4) #For Observer 1 Dat2<-matrix(NA,N_total,4) #For Observer 2 Transect_t<-All_Dat_Count$Transect Observ_t<-All_Dat_Count$Observer Detect_t<-All_Dat_Count$Obs Dist_t<-All_Dat_Count$Distance for (i in 1:N_total) { #Dat1 Dat1[i,1]=Transect_t[i] Dat1[i,2]=Observ_t[i] Dat1[i,3]=Detect_t[i] Dat1[i,4]=Dist_t[i] #Dat2 Dat2[i,1]=Transect_t[i] Dat2[i,2]=Observ_t[i] Dat2[i,3]=Detect_t[i] Dat2[i,4]=Dist_t[i] } Dat1<-as.data.frame(Dat1) colnames(Dat1)=c("Transect", "Observer", "Detected", "Distance") Dat1<-subset(Dat1,Observer==1) Obs1<-Dat1$Detected Dat2<-as.data.frame(Dat2) colnames(Dat2)=c("Transect", "Observer", "Detected", "Distance") Dat2<-subset(Dat2,Observer==2) Obs2<-as.numeric(Dat2$Detected) N_sights<-length(Obs2) y10<-array(NA,N_sights) #Seen by Observer 1 and not 2 y01<-array(NA,N_sights) #Seen by Observer 2 and not 1 y11<-array(NA,N_sights) #Seen by both Observers for (i in 1:N_sights){ y10[i]<-(1*(Obs1[i]>0))*(1*(Obs2[i]<1)) y01[i]<-(1*(Obs1[i]<1))*(1*(Obs2[i]>0)) y11[i]<-(1*(Obs1[i]>0))*(1*(Obs2[i]>0)) } #Final input for Bayesian MRDS analysis Dat_Sightings<-cbind(Dat1,y10,y01,y11) #Make input for habiat model (observed grid cells with covariate values) Final_Cov_Mat<-cbind(Sightings,HabCov) Final_Cov_Mat<-as.data.frame(Final_Cov_Mat) colnames(Final_Cov_Mat)<-c("NSights", "HabCov") #Input of all grid cells (including unobserved) to predict on Final_Cov_Mat_Pred<-matrix(NA,nrow=length(HabCov.All), ncol=2) Final_Cov_Mat_Pred[,1]<-1 #Dummy vriable for NSights Final_Cov_Mat_Pred[,2]<- HabCov.All Final_Cov_Mat_Pred<-as.data.frame(Final_Cov_Mat_Pred) colnames(Final_Cov_Mat_Pred)<-c("NSights", "HabCov") #Define habitat model (GAM) using jagam function jd <- jagam(NSights~s(HabCov, bs="ts", k=5), data= Final_Cov_Mat, family = poisson(link=log), file = "Sim_jagam_Single_Cov") #GAM Data X<-jd$jags.data$X S1<-jd$jags.data$S1 zero<-jd$jags.data$zero #Initial starting values for MCMC sampling b<-jd$jags.ini$b lambda<-jd$jags.ini$lambda #JAGS module load.module("glm") #Input for Compound Poisson-Gamma likelihood NonZeroObs<-which(Sightings>0) Nnonzero<-length(NonZeroObs) ZeroObs<-which(Sightings==0) Nzero<-length(ZeroObs) #For integrating Hazard Rate Model #Number of steps in integration (high number is more precise but slows down model) steps<-50 #Make vector to integrate from 0 to truncation distance y.step_all<-seq(0.01,W, length=steps) y.step<-y.step_all[1:(steps-1)] y.step_plus1<-y.step_all[2:steps] step.size<-W/(steps-1) N_Sights<-dim(Dat_Sightings)[1] ones.dist<-array(NA,N_Sights) for (i in 1:N_Sights){ ones.dist[i]<-1 } #Beta prior on Surface Availability c_Av.est=((Av.est*(1-Av.est))/((Av.est*Av_CV)^2))-1 a_Av.est=c_Av.est*Av.est b_Av.est=c_Av.est*(1-Av.est) #Run model in JAGS #Name script sink("Sim_Model.bugs") cat(" model{ ###### # DETECTABILITY COMPONENT OF MODEL ##### #Priors for MRDS parameters #MR parameters b1 ~ dnorm(0,0.0001) b2 ~ dnorm(0,0.0001) b.dist~ dnorm(0,0.0001) #DS parameters (Hazard Rate) b.df.0 ~ dunif(0,10) #Hazard Rate b.df ~ dunif(0,10) #Prior for power parameter of Tweedie distribution nu <- (2 - zeta)/(zeta - 1) zeta ~ dunif(1.01,2) #Needs to be trunctaed bewteen (just above) 1 and 2 #Informed prior for surface vavaialbility Av~dbeta(a_Av.est, b_Av.est) ##### BEGIN MODEL LOOP for MRDS analysis for(i in 1:N_Sights){ ################## #MR Part ################## logit(p1[i])<-b1+b.dist*dist[i] logit(p2[i])<-b2+b.dist*dist[i] p.dot[i]<-p1[i]+p2[i]-p1[i]*p2[i] logit(p1.0[i])<-b1 logit(p2.0[i])<-b2 g0[i]<-p1.0[i]+p2.0[i]-p1.0[i]*p2.0[i] pi.1[i]<-p1[i]*(1-p2[i])/p.dot[i] #y10 pi.2[i]<-(1-p1[i])*p2[i]/p.dot[i] #y01 pi.3[i]<-p1[i]*p2[i]/p.dot[i] #y11 Obs[i,1]~dbern(pi.1[i]) Obs[i,2]~dbern(pi.2[i]) Obs[i,3]~dbern(pi.3[i]) #################### #DS Part ################### mu.df[i] <- b.df.0 #Can add detection coavriates here if you want #Numerically integrate hazard rate function esw_pred[i,1]<-0 for (c in 1:(steps-1)){ dx1[i,c]<- 1-exp(-((y.step[c]/mu.df[i])^(-b.df))) dx2[i,c]<- 1-exp(-((y.step_plus1[c]/mu.df[i])^(-b.df))) esw_pred[i,c+1]<-esw_pred[i,c]+0.5*step.size*(dx1[i,c]+dx2[i,c]) } esw[i]<-esw_pred[i,steps] #Ones Trick L.f0[i] <- (1-exp(-((dist[i]/ mu.df[i])^(-b.df)))) * 1/esw[i] #hazard rate likelihood phi[i]<- L.f0[i]/C ones.dist[i] ~ dbern(phi[i]) } #End detetcion function loop over observed distances (sightings) mean.esw <- mean(esw[]) #Calculate effective strip width mean.g0<-mean(g0) #Estimate detection probability for a grid cell (no detection covaraites so it's the same for all grid cells) P.grid<-mean.g0*mean.esw/W*Av ################################################################################################################### #GAM Stuff gamma1 ~ dunif(-100,100) gammaHabCov ~ dunif(-100,100) b.plusgamma[1]<-b[1]+gamma1 b.plusgamma[2:5]<-b[2:5]+gammaHabCov b.minusgamma[1]<-b[1]-gamma1 b.minusgamma[2:5]<-b[2:5]-gammaHabCov #Predictions on observed grid cells eta <- X %*% b ## linear predictor eta.plusgamma<-X %*% b.plusgamma eta.minusgamma<-X %*% b.minusgamma eta.pred <- X.pred %*% b ## Make predictions on all grid cells ## Parametric effect priors CHECK tau=1/22^2 is appropriate! for (i in 1:1) { b[i] ~ dnorm(0,0.0021) } ## prior for s(HabCov)... K1 <- S1[1:4,1:4] * lambda b[2:5] ~ dmnorm(zero[2:5],K1) ## smoothing parameter priors CHECK... for (i in 1:1) { lambda ~ dgamma(.05,.005) rho <- log(lambda) } ##### BEGIN MODEL LOOP for habitat function (GAM) over all obsrevd grid cells for (t in 1:N_t) { effort_offset[t]<-Effort[t]*2*W mu[t] <- exp(eta[t]) ## expected density mu_g[t] <- exp(eta.plusgamma[t]/2) mu_p[t] <- exp(eta.minusgamma[t]/2) eff.mu_p[t]<-mu_p[t]*P.grid*effort_offset[t] } #Tweedie/CPG Likelihood for observed sightings in grid cell t for (i in 1:Nnonzero){ Sightings[NonZeroObs[i],1] ~ dgamma(psi[NonZeroObs[i]] * nu , nu / mu_g[NonZeroObs[i]]) psi[NonZeroObs[i]] ~ dpois(eff.mu_p[NonZeroObs[i]]) } for (i in 1:Nzero){ Sightings[ZeroObs[i],1] ~ dpois(eff.mu_p[ZeroObs[i]]) psi[ZeroObs[i]] ~ dpois(eff.mu_p[ZeroObs[i]]) } #Estimate abundance for entire study area for (j in 1:N.total){ N.grid[j]<-exp(eta.pred[j])*Area } pop.obs<-sum(mu[])*100 Pop<-sum(N.grid[]) } # end model ",fill=TRUE) sink() #Bundle data Model.data <- list( N_Sights=dim(Dat_Sightings)[1], Sightings=Sightings, Obs=Dat_Sightings[,5:7], K=10000, C=10000, ones.dist=ones.dist, W=W, N_t=N.observed, #Number of observed grid cells N.total=N.total, #Number of all grid cells dist=c(Dat_Sightings[,4]), Area=Area, Effort=Trans.Length, a_Av.est=a_Av.est, b_Av.est=b_Av.est, steps=steps, y.step=y.step, y.step_plus1=y.step_plus1, step.size=step.size, #GAM Data X=X, X.pred=X.pred, S1=S1, zero=zero, #Tweedie/CPG Stuff NonZeroObs=NonZeroObs, Nnonzero=Nnonzero, ZeroObs=ZeroObs, Nzero=Nzero ) # Initial starting values for MCMC sampling inits <- function(){list( #Detection probability parameters b.df.0 = runif(1,1,3), #Scale paramter b.df = runif(1,0,1), #Shape parameter b1 = runif(1,0,1), b2 = runif(1,0,1), b.dist= runif(1,0,1), #CPG/Tweedie stuff psi=as.numeric(Sightings != 0), gamma1=0, gammaHabCov=0, #GAM inits b=b, lambda=lambda ) } #Define parameters to be monitored varsToMonitor<-c("b.df.0", "b.df", "b.dist", "b1","b2","mean.esw", "P_0","P.grid", "mu", "eff.mu","eta","eta.pred", "lambda", "b" , "rho", "mean.g0", "Av", "N.grid", "gamma1", "gammaHabCov", "zeta", "nu", "effort_offset", "Pop", "pop.obs") #MCMC parameters nitt=50000 burnin=20000 chains=1 thin=50 #Sample model jm <- jags.model("Sim_Model.bugs", data = Model.data, inits = inits, n.adapt=burnin, n.chains = chains) output <- jags.samples(jm, varsToMonitor, n.iter = nitt, thin = thin) #Make posetrior predictions on all grid cells jam <- sim2jam(output, jd$pregam) pd <- data.frame(HabCov=HabCov.All) Xp <- predict(jam, type = "lpmatrix", newdata = pd) preds_mat <- Xp %*% output$b[, , 1] preds<-exp(preds_mat) N_mat<-preds*Area Pop_vec<-colSums(N_mat) ################# #Summarize posterior estimates of abundnace #################### Pop_est.BH<-median(Pop_vec) Pop_mean.BH<-mean(Pop_vec) Pop_sd.BH<-sd(Pop_vec) Pop_cv.BH<-Pop_sd.BH/Pop_est.BH sort_pop<-sort(Pop_vec) #For calculating 95% credible intervals n.sims<-length(sort_pop) upper.BH<-sort_pop[round(0.975*n.sims)] lower.BH<-sort_pop[round(0.025*n.sims)] #Calulate coverage coverage.BH<-as.numeric(Mean.Poplower.BH)