#BRUSHY CREEK SPRING MANUSCRIPT PREP #IMPORT FILE BCS=read.csv(file.choose()) str(BCS) BCS$Date=as.Date(BCS$Date,tryFormats=c("%m/%d/%Y")) BCS$Sal_Count=as.numeric(BCS$Sal_Count) count=BCS$Sal_Count presence=rep(NA,length(count)) for(i in 1:length(count)){ presence[i]=ifelse(count[i] > 0,1,0) } BCS$presence=presence #create dataset that excludes the spring-run portion of the site. BCS_C=BCS[which(BCS$Location=="culvert"),] str(BCS_C) #identify outlier - This must be equipment failure or incorrectly reported in the field (it happens!! SORRY) which(BCS_C$DO==max(BCS_C$DO)) #[1] 31 BCS_C[31,"Date"] #[1] "2020-02-05" BCS_C[31,"DO"]=NA #REMOVE OUTLIER #identify outlier - this happens to be when there was an irregation leak upstream which(BCS_C$Cond==min(BCS_C$Cond)) #[1] 29 BCS_C[29,"Date"] #[1] "2019-10-03" BCS_C[29,"Cond"]=NA #REMOVE OUTLIER #####EXPLORE WATER CHEMISTRY DATA####################################################################################################### ##comepare run from culvert for these measures t.test(BCS[which(BCS$Location=="run"),"Temp"],BCS[which(BCS$Location=="culvert"),"Temp"]) t.test(BCS[which(BCS$Location=="run"),"DO"],BCS[which(BCS$Location=="culvert"),"DO"]) t.test(BCS[which(BCS$Location=="run"),"pH"],BCS[which(BCS$Location=="culvert"),"pH"]) #SIGNIFICANT t.test(BCS[which(BCS$Location=="run"),"Cond"],BCS[which(BCS$Location=="culvert"),"Cond"]) #SIGNIFICANT #this portion of the site is highly influenced by runoff. Unhelpful for our hypotheses. ##Compare salamander capture days with salamander non-capture days WITHIN the culvert t.test(BCS_C[which(BCS_C$Sal_Count>0),"Temp"],BCS_C[which(BCS_C$Sal_Count==0),"Temp"]) t.test(BCS_C[which(BCS_C$Sal_Count>0),"DO"],BCS_C[which(BCS_C$Sal_Count==0),"DO"]) t.test(BCS_C[which(BCS_C$Sal_Count>0),"pH"],BCS_C[which(BCS_C$Sal_Count==0),"pH"]) t.test(BCS_C[which(BCS_C$Sal_Count>0),"Cond"],BCS_C[which(BCS_C$Sal_Count==0),"Cond"]) #Strong indication Water Chemistry metrics are not good predictors of salamander occurrence. ######################################################################################################################################## #VISUALIZE DIFFERENCES AMONG WATER CHEMSITRY library(ggplot2) library(grid) library(gridBase) library(gtable) library(gridExtra) library(extrafont) loadfonts(device="win") library(Rmisc) #for function summarySE, used for grabbing CI's for group means #MAKE DATASETS FOR PLOTTING GROUP MEANS WITH CI's temp.df <- summarySE(BCS,measurevar = "Temp", groupvars = c("Location","presence"),na.rm=T) pH.df <- summarySE(BCS,measurevar = "pH", groupvars = c("Location","presence"),na.rm=T) DO.df <- summarySE(BCS,measurevar = "DO", groupvars = c("Location","presence"),na.rm=T) cond.df <- summarySE(BCS,measurevar = "Cond", groupvars = c("Location","presence"),na.rm=T) dodge=position_dodge(0.5) #this is handy, and allows the errorbars and points to move in unison temp.fig <- ggplot(temp.df, aes(x=presence, y=Temp,group=Location,color=Location)) + geom_errorbar(aes(ymin=Temp-ci, ymax=Temp+ci), width=.1, size=1, position=dodge, show.legend = F) + geom_point(size=4,position=dodge, show.legend = F) + xlab("Salamander Occurrence") + ylab("Temperature (°C)") + ylim(20, 24) + scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present")) + theme(axis.text.x = element_blank(), axis.text.y = element_text(family="Times New Roman",color = "black", size = 10), axis.title.x = element_blank(), axis.title.y = element_text(family="Times New Roman",color = "black", size = 12), legend.position = "right", legend.title = element_blank()) pH.fig <- ggplot(pH.df, aes(x=presence, y=pH,group=Location,color=Location)) + geom_errorbar(aes(ymin=pH-ci, ymax=pH+ci), width=.1, size=1, position=dodge, show.legend = F) + geom_point(size=4,position=dodge, show.legend = F) + xlab("Salamander Occurrence") + ylab("pH") + scale_y_continuous(limits = c(6.8,8.1), breaks = seq(6.8,8.1,0.2)) + scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present")) + theme(axis.text.x = element_blank(), axis.text.y = element_text(family="Times New Roman",color = "black", size = 10), axis.title.x = element_blank(), axis.title.y = element_text(family="Times New Roman",color = "black", size = 12)) DO.fig <- ggplot(DO.df, aes(x=presence, y=DO,group=Location,color=Location)) + geom_errorbar(aes(ymin=DO-ci, ymax=DO+ci), width=.1, size=1, position=dodge, show.legend = F) + geom_point(size=4, position=dodge, show.legend = F) + xlab("Salamander Occurrence") + ylab("Dissolved Oxygen (mg/L)") + ylim(4,9) + scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present")) + theme(axis.text.x = element_text(family="Times New Roman",color = "black", size = 10), axis.text.y = element_text(family="Times New Roman",color = "black", size = 10), axis.title.x = element_text(family="Times New Roman",color = "black", size = 12), axis.title.y = element_text(family="Times New Roman",color = "black", size = 12)) cond.fig <- ggplot(cond.df, aes(x=presence, y=Cond,group=Location,color=Location)) + geom_errorbar(aes(ymin=Cond-ci, ymax=Cond+ci), width=.1, size=1, position=dodge) + geom_point(size=4, position=dodge) + xlab("Salamander Occurrence") + ylab("Conductivity (µS/cm)") + ylim(750,950) + scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present")) + theme(axis.text.x = element_text(family="Times New Roman",color = "black", size = 10), axis.text.y = element_text(family="Times New Roman",color = "black", size = 10), axis.title.x = element_text(family="Times New Roman",color = "black", size = 12), axis.title.y = element_text(family="Times New Roman",color = "black", size = 12), legend.position = c(0.8,0.2), legend.title = element_blank(), legend.background = element_rect(color = "black", fill = NA ), legend.key = element_rect(color = NA, fill = NA), legend.text = element_text(family="Times New Roman")) # Convert plots to tables and set widths to be equal to get y axes aligned g.temp <- ggplotGrob(temp.fig) g.ph <- ggplotGrob(pH.fig) g.cond <- ggplotGrob(cond.fig) g.do <- ggplotGrob(DO.fig) # PLOT WITHOUT RAINFALL # row1 <- list(g.temp,g.ph) row2 <- list(g.do,g.cond) combine1 <- do.call(cbind, c(row1, size = "max")) combine2 <- do.call(cbind, c(row2, size = "max")) final.fig <- do.call(rbind, c(list(combine1,combine2), size = "max")) # Used "max" to accomodate largest figure #grid.draw(final.fig) png("Figure5_WaterChemistry.png",width = 7.5, height = 7.5, units = "in",res = 300) grid.draw(final.fig) dev.off() ###################################################################################################################################### #ANALYZE COUNTS WITH GLMS ############################################################################################################## hist(BCS$Sal_Count) #Perform the chi-square goodness of fit test library(vcd) gf <- goodfit(BCS_C$Sal_Count,type= "poisson",method= "MinChisq") summary(gf) plot(gf,main="Count data vs Poisson distribution") #this test seems to indicate that our data are poisson distributed BUT BARELY library(MASS) library(pscl) library(multcomp) library(MuMIn) library(msme) p1 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to30 , offset = log(Effort_obj), data=BCS_C, family=poisson) p2 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to60 , offset = log(Effort_obj), data=BCS_C, family=poisson) p3 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to90 , offset = log(Effort_obj), data=BCS_C, family=poisson) p4 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to60 , offset = log(Effort_obj), data=BCS_C, family=poisson) p5 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to90 , offset = log(Effort_obj), data=BCS_C, family=poisson) p6 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to120 , offset = log(Effort_obj), data=BCS_C, family=poisson) p7 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to90 , offset = log(Effort_obj), data=BCS_C, family=poisson) p8 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to120 , offset = log(Effort_obj), data=BCS_C, family=poisson) p9 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to150 , offset = log(Effort_obj), data=BCS_C, family=poisson) p10 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to120 , offset = log(Effort_obj), data=BCS_C, family=poisson) p11 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to150 , offset = log(Effort_obj), data=BCS_C, family=poisson) p12 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to180 , offset = log(Effort_obj), data=BCS_C, family=poisson) #Model Selection using AIC MS.p=model.sel(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12) MS.p summary(p6) library(DescTools) PseudoR2(p6,"Nagelkerke") P__disp(p6) #OVERDISPERSED!!!!!!! ########################################################################################################## nb1=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to30 + offset(log(Effort_obj)), data=BCS_C) nb2=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to60 + offset(log(Effort_obj)), data=BCS_C) nb3=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to90 + offset(log(Effort_obj)), data=BCS_C) nb4=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to60 + offset(log(Effort_obj)), data=BCS_C) nb5=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to90 + offset(log(Effort_obj)), data=BCS_C) nb6=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to120 + offset(log(Effort_obj)), data=BCS_C) nb7=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to90 + offset(log(Effort_obj)), data=BCS_C) nb8=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to120 + offset(log(Effort_obj)), data=BCS_C) nb9=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to150 + offset(log(Effort_obj)), data=BCS_C) nb10=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to120 + offset(log(Effort_obj)), data=BCS_C) nb11=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to150 + offset(log(Effort_obj)), data=BCS_C) nb12=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to180 + offset(log(Effort_obj)), data=BCS_C) #Model Selection using AIC MS=model.sel(nb1,nb2,nb3,nb4,nb5,nb6,nb7,nb8,nb9,nb10,nb11,nb12) MS print(round(data.frame(MS$df, MS$AICc, MS$delta, MS$weight),3)) #two competing models #90 days of accumulation summary(nb6) #31-120 days #very large theta, maybe underdispersed. BUT! results identical to poisson version summary(nb3) #1-90 days PseudoR2(nb6,"Nagelkerke") PseudoR2(nb3,"Nagelkerke") PseudoR2(nb2,"Nagelkerke") PseudoR2(nb5,"Nagelkerke") PseudoR2(nb4,"Nagelkerke") PseudoR2(nb8,"Nagelkerke") PseudoR2(nb7,"Nagelkerke") PseudoR2(nb9,"Nagelkerke") PseudoR2(nb10,"Nagelkerke") PseudoR2(nb12,"Nagelkerke") PseudoR2(nb1,"Nagelkerke") PseudoR2(nb11,"Nagelkerke") ######################################################################################################################################### #######CREATE FIGURE OF PREDICTED VALUES################################## ## MODEL PREDICTIONS WITH CONFIDENCE INTERVALS AND DATA POINTS ## ## Models predicted with 60cm of rainfall ## pred.Day <- rep(c(mean(BCS_C[,"Day"])),times=61) pred.Rain.31to120 <- rep(c(0:60), times = 1) pred.Effort_obj <- rep(c(mean(BCS_C[,"Effort_obj"])),times=61) pred.data <- data.frame(pred.Day,pred.Rain.31to120,pred.Effort_obj) colnames(pred.data) <- c("Day","Rain.31to120","Effort_obj") ginv <- nb6$family$linkinv ## inverse link function pred.nb6 <- predict(nb6, newdata = pred.data, type = "link", se.fit=TRUE) pred.data$Sal_Count <- ginv(pred.nb6[[1]]) pred.data$lo <- ginv(pred.nb6[[1]] - 1.96 * pred.nb6[[2]]) pred.data$up <- ginv(pred.nb6[[1]] + 1.96 * pred.nb6[[2]]) #29 cm of rainfall to cause 1 or more salamanders to occur #plot of predicted values plot.nb6 <- ggplot(pred.data, aes(x = Rain.31to120, y = Sal_Count)) + geom_ribbon(aes(ymin=lo, ymax=up), fill = "gray85") + geom_line(size = 1.5) + geom_point(data = BCS_C, aes(x = Rain.31to120, y = Sal_Count), shape=19, size=2, position=position_jitter(w=0.1, h=0.05)) + xlab("Rainfall Accumulation (cm) \n 31-120 Days Prior to Survey") + ylab("Salamander Detections") + scale_x_continuous(breaks=seq(0,60,10)) + ylim(0,10) + coord_cartesian(ylim = c(0,4)) + geom_hline(yintercept=1, linetype = "solid", color = "red") + theme(axis.text.x = element_text(family="Times New Roman", color = "black", size = 14), axis.text.y = element_text(family="Times New Roman", color = "black", size = 14), axis.title.x = element_text(family="Times New Roman", color = "black", size = 16), axis.title.y = element_text(family="Times New Roman", color = "black", size = 16), panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(color = "black", fill = NA, size = 1.25), legend.position = "bottom") #plot of raw salamander counts against rainfall accumulation 31-120 days prior to survey max(BCS$Rain.31to120)/3 #account for 3 being max number of salamander observations on secondary y axis c6=ggplot(BCS_C) + geom_col(aes(x=Date, y=Sal_Count*17), size = 1.5, color="red", fill="red", group = 1)+ geom_line(aes(x=Date, y=Rain.31to120), size = 1.5, color = "black") + scale_y_continuous(sec.axis = sec_axis(~./17, name = "Salamander Detections"))+ labs(y="Rainfall Accumulation (cm) \n 31-120 Days Prior to Survey", x="Date")+ theme(axis.text.x = element_text(family="Times New Roman", color = "black", size = 14), axis.text.y = element_text(family="Times New Roman", color = "black", size = 14), axis.title.x = element_text(family="Times New Roman", color = "black", size = 16), axis.title.y = element_text(family="Times New Roman", color = "black", size = 16), panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(color = "black", fill = NA, size = 1.25), legend.position = "bottom") png("Figure6_Predictions.png", width = 7.5, height = 7.5, units = 'in',res = 300) grid.arrange(plot.nb6,c6) dev.off() ######################################################################################################################################### #ESTIMATE DETECTION AND SURVIVAL USING CJS library(RMark) BCS_CH=read.csv(file.choose(),header = F) BCS_CH.mat=as.matrix(BCS_CH[,2:ncol(BCS_CH)]) #function to read the matrix and create the capture history strings y=BCS_CH.mat #input site matrix here# k<-dim(y)[2] pasty<-function(x) { k<-ncol(x) n<-nrow(x) out<-array(dim=n) for (i in 1:n) { y<-(x[i,]>0)*1 out[i]<-paste(y,sep="",collapse="") } return(out) } #capture history data frame for RMark (FINALLY) capt.hist<-data.frame(ch=pasty(y)) BCS.proc=process.data(capt.hist,model="CJS") BCS.ddl=make.design.data(BCS.proc) complete_dataset_model<-mark(BCS.proc) complete_dataset_model$results$real #EXAMINE INFLUENCE OF SINGLE LONG TERM RECAPTURE EVENT minus.recap=ncol(JPS.mat)-1 #removes final column of capture history y=JPS.mat[,2:minus.recap] #RERUN LINES 9-26!!! reduced_dataset_model<-mark(BCS.proc) reduced_dataset_model$results$real #Goodness of fit tests using R2ucare library(R2ucare) overall_CJS(y,rep(1,nrow(y))) #no evidence for lack-of-fit (p>0.05) #Cannot diagnose sub-components given our sample size test2ct(y,rep(1,nrow(y))) test2cl(y,rep(1,nrow(y))) test3sr(y,rep(1,nrow(y))) test3sm(y,rep(1,nrow(y)))