#BRUSHY CREEK SPRING MANUSCRIPT PREP

#IMPORT FILE
BCS=read.csv(file.choose())

str(BCS)
BCS$Date=as.Date(BCS$Date,tryFormats=c("%m/%d/%Y"))
BCS$Sal_Count=as.numeric(BCS$Sal_Count)

count=BCS$Sal_Count
presence=rep(NA,length(count))
for(i in 1:length(count)){ 
  presence[i]=ifelse(count[i] > 0,1,0)
}
BCS$presence=presence

#create dataset that excludes the spring-run portion of the site.
BCS_C=BCS[which(BCS$Location=="culvert"),]
str(BCS_C)

#identify outlier  - This must be equipment failure or incorrectly reported in the field (it happens!! SORRY)
which(BCS_C$DO==max(BCS_C$DO))
#[1] 31
BCS_C[31,"Date"] 
#[1] "2020-02-05"
BCS_C[31,"DO"]=NA #REMOVE OUTLIER

#identify outlier  - this happens to be when there was an irregation leak upstream
which(BCS_C$Cond==min(BCS_C$Cond))
#[1] 29
BCS_C[29,"Date"] 
#[1] "2019-10-03" 
BCS_C[29,"Cond"]=NA #REMOVE OUTLIER

#####EXPLORE WATER CHEMISTRY DATA#######################################################################################################
##comepare run from culvert for these measures
t.test(BCS[which(BCS$Location=="run"),"Temp"],BCS[which(BCS$Location=="culvert"),"Temp"])
t.test(BCS[which(BCS$Location=="run"),"DO"],BCS[which(BCS$Location=="culvert"),"DO"]) 
t.test(BCS[which(BCS$Location=="run"),"pH"],BCS[which(BCS$Location=="culvert"),"pH"]) #SIGNIFICANT
t.test(BCS[which(BCS$Location=="run"),"Cond"],BCS[which(BCS$Location=="culvert"),"Cond"]) #SIGNIFICANT 
#this portion of the site is highly influenced by runoff. Unhelpful for our hypotheses.

##Compare salamander capture days with salamander non-capture days WITHIN the culvert
t.test(BCS_C[which(BCS_C$Sal_Count>0),"Temp"],BCS_C[which(BCS_C$Sal_Count==0),"Temp"])
t.test(BCS_C[which(BCS_C$Sal_Count>0),"DO"],BCS_C[which(BCS_C$Sal_Count==0),"DO"])
t.test(BCS_C[which(BCS_C$Sal_Count>0),"pH"],BCS_C[which(BCS_C$Sal_Count==0),"pH"])
t.test(BCS_C[which(BCS_C$Sal_Count>0),"Cond"],BCS_C[which(BCS_C$Sal_Count==0),"Cond"])
#Strong indication Water Chemistry metrics are not good predictors of salamander occurrence.

########################################################################################################################################
#VISUALIZE DIFFERENCES AMONG WATER CHEMSITRY
library(ggplot2)
library(grid)
library(gridBase)
library(gtable)
library(gridExtra)
library(extrafont)
loadfonts(device="win") 
library(Rmisc) #for function summarySE, used for grabbing CI's for group means

#MAKE DATASETS FOR PLOTTING GROUP MEANS WITH CI's
temp.df <- summarySE(BCS,measurevar = "Temp", groupvars = c("Location","presence"),na.rm=T)
pH.df <- summarySE(BCS,measurevar = "pH", groupvars = c("Location","presence"),na.rm=T)
DO.df <- summarySE(BCS,measurevar = "DO", groupvars = c("Location","presence"),na.rm=T)
cond.df <- summarySE(BCS,measurevar = "Cond", groupvars = c("Location","presence"),na.rm=T)

dodge=position_dodge(0.5) #this is handy, and allows the errorbars and points to move in unison

temp.fig <- ggplot(temp.df, aes(x=presence, y=Temp,group=Location,color=Location)) + 
  geom_errorbar(aes(ymin=Temp-ci, ymax=Temp+ci), width=.1, size=1, position=dodge, show.legend = F) +
  geom_point(size=4,position=dodge, show.legend = F) + 
  xlab("Salamander Occurrence") + 
  ylab("Temperature (蚓)") +
  ylim(20, 24) +
  scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present")) +
  theme(axis.text.x = element_blank(), 
        axis.text.y = element_text(family="Times New Roman",color = "black", size = 10),
        axis.title.x = element_blank(),
        axis.title.y = element_text(family="Times New Roman",color = "black", size = 12),
        legend.position = "right", legend.title = element_blank())

pH.fig <- ggplot(pH.df, aes(x=presence, y=pH,group=Location,color=Location)) + 
  geom_errorbar(aes(ymin=pH-ci, ymax=pH+ci), width=.1, size=1, position=dodge, show.legend = F) +
  geom_point(size=4,position=dodge, show.legend = F) + 
  xlab("Salamander Occurrence") + 
  ylab("pH") +
  scale_y_continuous(limits = c(6.8,8.1), breaks = seq(6.8,8.1,0.2)) +
  scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present"))  +
  theme(axis.text.x = element_blank(), 
        axis.text.y = element_text(family="Times New Roman",color = "black", size = 10),
        axis.title.x = element_blank(),
        axis.title.y = element_text(family="Times New Roman",color = "black", size = 12))

DO.fig <- ggplot(DO.df, aes(x=presence, y=DO,group=Location,color=Location)) + 
  geom_errorbar(aes(ymin=DO-ci, ymax=DO+ci), width=.1, size=1, position=dodge, show.legend = F) +
  geom_point(size=4, position=dodge, show.legend = F) + 
  xlab("Salamander Occurrence") + 
  ylab("Dissolved Oxygen (mg/L)") +
  ylim(4,9) +
  scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present"))  +
  theme(axis.text.x = element_text(family="Times New Roman",color = "black", size = 10), 
        axis.text.y = element_text(family="Times New Roman",color = "black", size = 10),
        axis.title.x = element_text(family="Times New Roman",color = "black", size = 12),
        axis.title.y = element_text(family="Times New Roman",color = "black", size = 12))

cond.fig <- ggplot(cond.df, aes(x=presence, y=Cond,group=Location,color=Location)) + 
  geom_errorbar(aes(ymin=Cond-ci, ymax=Cond+ci), width=.1, size=1, position=dodge) +
  geom_point(size=4, position=dodge) + 
  xlab("Salamander Occurrence") + 
  ylab("Conductivity (猶/cm)") +
  ylim(750,950) +
  scale_x_continuous(breaks=c(0,1),labels=c("Absent", "Present"))  +
  theme(axis.text.x = element_text(family="Times New Roman",color = "black", size = 10), 
        axis.text.y = element_text(family="Times New Roman",color = "black", size = 10),
        axis.title.x = element_text(family="Times New Roman",color = "black", size = 12),
        axis.title.y = element_text(family="Times New Roman",color = "black", size = 12),
        legend.position = c(0.8,0.2), legend.title = element_blank(),
        legend.background = element_rect(color = "black", fill = NA ),
        legend.key = element_rect(color = NA, fill = NA),
        legend.text = element_text(family="Times New Roman"))

# Convert plots to tables and set widths to be equal to get y axes aligned
g.temp <- ggplotGrob(temp.fig)
g.ph <- ggplotGrob(pH.fig)
g.cond <- ggplotGrob(cond.fig) 
g.do <- ggplotGrob(DO.fig)

# PLOT WITHOUT RAINFALL #
row1 <- list(g.temp,g.ph)
row2 <- list(g.do,g.cond)
combine1 <- do.call(cbind, c(row1, size = "max"))
combine2 <- do.call(cbind, c(row2, size = "max"))
final.fig <- do.call(rbind, c(list(combine1,combine2), size = "max")) # Used "max" to accomodate largest figure
#grid.draw(final.fig)

png("Figure5_WaterChemistry.png",width = 7.5, height = 7.5, units = "in",res = 300)
grid.draw(final.fig)
dev.off()

######################################################################################################################################
#ANALYZE COUNTS WITH GLMS ##############################################################################################################
hist(BCS$Sal_Count)

#Perform the chi-square goodness of fit test 
library(vcd)
gf <- goodfit(BCS_C$Sal_Count,type= "poisson",method= "MinChisq")
summary(gf)
plot(gf,main="Count data vs Poisson distribution")
#this test seems to indicate that our data are poisson distributed BUT BARELY

library(MASS)
library(pscl)
library(multcomp)
library(MuMIn)
library(msme)

p1 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to30 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p2 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to60 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p3 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.1to90 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p4 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to60 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p5 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to90 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p6 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.31to120 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p7 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to90 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p8 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to120 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p9 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.61to150 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p10 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to120 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p11 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to150 , offset = log(Effort_obj), data=BCS_C, family=poisson)
p12 <- glm(Sal_Count ~ Day + I(Day^2) + Rain.91to180 , offset = log(Effort_obj), data=BCS_C, family=poisson)

#Model Selection using AIC
MS.p=model.sel(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12)
MS.p

summary(p6)

library(DescTools)
PseudoR2(p6,"Nagelkerke") 

P__disp(p6)
#OVERDISPERSED!!!!!!!
##########################################################################################################
nb1=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to30 +  offset(log(Effort_obj)), data=BCS_C)
nb2=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to60 +  offset(log(Effort_obj)), data=BCS_C)
nb3=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.1to90 +  offset(log(Effort_obj)), data=BCS_C)
nb4=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to60 +  offset(log(Effort_obj)), data=BCS_C)
nb5=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to90 +  offset(log(Effort_obj)), data=BCS_C)
nb6=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.31to120 +  offset(log(Effort_obj)), data=BCS_C)
nb7=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to90 +  offset(log(Effort_obj)), data=BCS_C)
nb8=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to120 +  offset(log(Effort_obj)), data=BCS_C)
nb9=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.61to150 +  offset(log(Effort_obj)), data=BCS_C)
nb10=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to120 +  offset(log(Effort_obj)), data=BCS_C)
nb11=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to150 +  offset(log(Effort_obj)), data=BCS_C)
nb12=glm.nb(Sal_Count ~ Day + I(Day^2) + Rain.91to180 +  offset(log(Effort_obj)), data=BCS_C)

#Model Selection using AIC
MS=model.sel(nb1,nb2,nb3,nb4,nb5,nb6,nb7,nb8,nb9,nb10,nb11,nb12)
MS
print(round(data.frame(MS$df, MS$AICc, MS$delta, MS$weight),3))
  
#two competing models
#90 days of accumulation
summary(nb6) #31-120 days #very large theta, maybe underdispersed. BUT! results identical to poisson version
summary(nb3) #1-90 days

PseudoR2(nb6,"Nagelkerke") 
PseudoR2(nb3,"Nagelkerke") 
PseudoR2(nb2,"Nagelkerke") 
PseudoR2(nb5,"Nagelkerke") 
PseudoR2(nb4,"Nagelkerke") 
PseudoR2(nb8,"Nagelkerke") 
PseudoR2(nb7,"Nagelkerke") 
PseudoR2(nb9,"Nagelkerke") 
PseudoR2(nb10,"Nagelkerke") 
PseudoR2(nb12,"Nagelkerke") 
PseudoR2(nb1,"Nagelkerke") 
PseudoR2(nb11,"Nagelkerke") 
#########################################################################################################################################
#######CREATE FIGURE OF PREDICTED VALUES##################################

## MODEL PREDICTIONS WITH CONFIDENCE INTERVALS AND DATA POINTS ##
## Models predicted with 60cm of rainfall ##
pred.Day <- rep(c(mean(BCS_C[,"Day"])),times=61)
pred.Rain.31to120 <- rep(c(0:60), times = 1)
pred.Effort_obj <- rep(c(mean(BCS_C[,"Effort_obj"])),times=61)

pred.data <- data.frame(pred.Day,pred.Rain.31to120,pred.Effort_obj)
colnames(pred.data) <- c("Day","Rain.31to120","Effort_obj")

ginv <- nb6$family$linkinv  ## inverse link function
pred.nb6 <- predict(nb6, newdata = pred.data, type = "link", se.fit=TRUE)
pred.data$Sal_Count <- ginv(pred.nb6[[1]])
pred.data$lo <- ginv(pred.nb6[[1]] - 1.96 * pred.nb6[[2]])
pred.data$up <- ginv(pred.nb6[[1]] + 1.96 * pred.nb6[[2]])

#29 cm of rainfall to cause 1 or more salamanders to occur

#plot of predicted values
plot.nb6 <- ggplot(pred.data, aes(x = Rain.31to120, y = Sal_Count)) +
  geom_ribbon(aes(ymin=lo, ymax=up), fill = "gray85") +
  geom_line(size = 1.5) +
  geom_point(data = BCS_C, aes(x = Rain.31to120, y = Sal_Count), shape=19, size=2, position=position_jitter(w=0.1, h=0.05)) +
  xlab("Rainfall Accumulation (cm) \n 31-120 Days Prior to Survey") +
  ylab("Salamander Detections") +
  scale_x_continuous(breaks=seq(0,60,10)) +
  ylim(0,10) + coord_cartesian(ylim = c(0,4)) +
  geom_hline(yintercept=1, linetype = "solid", color = "red") +
  theme(axis.text.x = element_text(family="Times New Roman", color = "black", size = 14), 
        axis.text.y = element_text(family="Times New Roman", color = "black", size = 14),                             
        axis.title.x = element_text(family="Times New Roman", color = "black", size = 16),
        axis.title.y = element_text(family="Times New Roman", color = "black", size = 16),        
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1.25),
        legend.position = "bottom")

#plot of raw salamander counts against rainfall accumulation 31-120 days prior to survey
max(BCS$Rain.31to120)/3 #account for 3 being max number of salamander observations on secondary y axis
c6=ggplot(BCS_C) + 
  geom_col(aes(x=Date, y=Sal_Count*17), size = 1.5, color="red", fill="red", group = 1)+
  geom_line(aes(x=Date, y=Rain.31to120), size = 1.5, color = "black") +
  scale_y_continuous(sec.axis = sec_axis(~./17, name = "Salamander Detections"))+
  labs(y="Rainfall Accumulation (cm) \n 31-120 Days Prior to Survey", x="Date")+
  theme(axis.text.x = element_text(family="Times New Roman", color = "black", size = 14), 
        axis.text.y = element_text(family="Times New Roman", color = "black", size = 14),                             
        axis.title.x = element_text(family="Times New Roman", color = "black", size = 16),
        axis.title.y = element_text(family="Times New Roman", color = "black", size = 16),        
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1.25),
        legend.position = "bottom")

png("Figure6_Predictions.png", width = 7.5, height = 7.5, units = 'in',res = 300)
grid.arrange(plot.nb6,c6)
dev.off()
#########################################################################################################################################
#ESTIMATE DETECTION AND SURVIVAL USING CJS 
library(RMark)
BCS_CH=read.csv(file.choose(),header = F)
BCS_CH.mat=as.matrix(BCS_CH[,2:ncol(BCS_CH)])

#function to read the matrix and create the capture history strings
y=BCS_CH.mat #input site matrix here#
k<-dim(y)[2]
pasty<-function(x) 
{
  k<-ncol(x)
  n<-nrow(x)
  out<-array(dim=n)
  for (i in 1:n)
  {
    y<-(x[i,]>0)*1
    out[i]<-paste(y,sep="",collapse="")
  }
  return(out)
}
#capture history data frame for RMark  (FINALLY) 
capt.hist<-data.frame(ch=pasty(y))

BCS.proc=process.data(capt.hist,model="CJS")
BCS.ddl=make.design.data(BCS.proc)

complete_dataset_model<-mark(BCS.proc)
complete_dataset_model$results$real

#EXAMINE INFLUENCE OF SINGLE LONG TERM RECAPTURE EVENT
minus.recap=ncol(JPS.mat)-1 #removes final column of capture history
y=JPS.mat[,2:minus.recap]

#RERUN LINES 9-26!!!

reduced_dataset_model<-mark(BCS.proc)
reduced_dataset_model$results$real

#Goodness of fit tests using R2ucare
library(R2ucare)
overall_CJS(y,rep(1,nrow(y))) #no evidence for lack-of-fit (p>0.05)

#Cannot diagnose sub-components given our sample size
test2ct(y,rep(1,nrow(y)))
test2cl(y,rep(1,nrow(y)))
test3sr(y,rep(1,nrow(y)))
test3sm(y,rep(1,nrow(y)))