# Note: The code for this analysis is in five parts, each of which are pasted below and demarcated by file names and "END" labels. 
# For best results, paste each section into its own .r file and run seperately.

# Regression_fit code

library(car)
library(ggplot2)
library(segmented)


# load data in long format here, assign to "datud"
# each hour should have two pairs of values, a pair of upstream photo and video counts,
# and a pair of downstream points.  Columns should be labeled "photo" "vid" and "direction" (case sensitive)


datud <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") # type path to photo/vid data here

datud <- subset(datud, stream=="Southeast") #enter stream name here (needs to match case and spelling)

##################################################################
####################################################################

# making the simple linear models.  The -1 causes the model to fit without an intercept (forces it thorugh the origin)
lin.mod <- lm(vid~photo-1, data=datud)
lin.mod.p <- lm(vid~photo+I(photo^2)-1, data=datud) #polynomial version

# fitting segmented models --  have to change the segmented control parameters to do no iterations 
# in order to constrain the breakpoint to the origin but still create the segmented object -- psi is the number of iterations

segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))
segmented.mod.p <- segmented(lin.mod.p, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) #same but polynomial

# constructing aic list with labels
aic<-c(
  round(AIC(lin.mod),2),
  round(AIC(lin.mod.p),2),
  round(AIC(segmented.mod),2),
  round(AIC(segmented.mod.p),2))

modnames<-c("lin.mod", "lin.mod.p", "segmented.mod", "segemented.mod.p")

data.frame(modnames[order(aic)],aic[order(aic)]) # shows table of models in order of AIC (lower is better)

# enter name of model to see details
summary(lin.mod.p)


###############################################################
# MODEL DIAGNOSTICS

#run the line of code for the model name you want to assess (only run the one you want)

mod<-lin.mod
mod<-lin.mod.p

mod<-segmented.mod
mod<-segmented.mod.p

par(mfrow=c(2,2))
plot(mod)


#assessing normality
qqPlot(mod, simulate=TRUE, main="Q-Q Plot") 

#assessing linearity of relationship

#residplot function (only have to run this part once, then can use the "residplot()" function afterwards)
residplot <- function(fit, nbreaks=10) {
  z <- rstudent(fit)
  hist(z, breaks=nbreaks, freq=FALSE,
       xlab="Studentized Residual",
       main="Distribution of Errors")
  rug(jitter(z), col="brown")
  curve(dnorm(x, mean=mean(z), sd=sd(z)),
        add=TRUE, col="blue", lwd=2)
  lines(density(z)$x, density(z)$y,
        col="red", lwd=2, lty=2)
  legend("topright",
         legend = c( "Normal Curve", "Kernel Density Curve"),
         lty=1:2, col=c("blue","red"), cex=.7)
}

residplot(mod)

#assessing constancy of error variance (homoskedasticity)
ncvTest(mod)
spreadLevelPlot(mod)


# influence and leverage tools
influence.measures(mod)
cooks.distance(mod)



######## video~ photo simple linear (NOT segmented)
ggplot(data=datud, aes(x=photo, y=vid)) +
  geom_point(pch=16, size=1) +
  geom_smooth(method="lm", formula= y~x-1, colour="black", linetype=1, size=.5, se=F)+
  #scale_y_continuous(breaks=seq(-50,450, 100)) +
  geom_hline(yintercept=0, size=.5, linetype=3)+
  geom_vline(xintercept=0, size=.5, linetype=3)+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=14))


######## video~ photo simple linear (YES segmented)
ggplot(data=datud, aes(x=photo, y=vid, group=direction)) +
  geom_point(pch=16, size=1) +
  geom_smooth(method="lm", formula= y~x-1, colour="black", linetype=1, size=.5, se=F)+
  #scale_y_continuous(breaks=seq(-50,450, 100)) +
  geom_hline(yintercept=0, size=.5, linetype=3)+
  geom_vline(xintercept=0, size=.5, linetype=3)+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=14))


######## video~ photo polynomial (NOT segmented)
ggplot(data=datud, aes(x=photo, y=vid)) +
  geom_point(pch=16, size=1) +
  geom_smooth(method="lm", formula= y~x+I(x^2)-1, colour="black", linetype=2, size=.5, se=F) +
  #scale_y_continuous(breaks=seq(-50,450, 100)) +
  geom_hline(yintercept=0, size=.5, linetype=3)+
  geom_vline(xintercept=0, size=.5, linetype=3)+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=14))


######## video~ photo polynomial (YES segmented)
ggplot(data=datud, aes(x=photo, y=vid, group=direction)) +
  geom_point(pch=16, size=1) +
  geom_smooth(method="lm", formula= y~x+I(x^2)-1, colour="black", linetype=2, size=.5, se=F) +
  #scale_y_continuous(breaks=seq(-50,450, 100)) +
  geom_hline(yintercept=0, size=.5, linetype=3)+
  geom_vline(xintercept=0, size=.5, linetype=3)+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=14))

##########################  END of regression_fit





# escapement estimate_se+mdw script

################## this script calculates predicted salmon passage, estimated in-stream salmon abundance,  cumulative abundance (escapement)
## and confidence intervals on escapement

library(plyr)
library(ggplot2)
library(gridExtra)
library(segmented)
library(scales)

#first, load hourly data in long format
hourraw <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/salmon_TLcounts_SE+MDW.csv")

# read in calibration values (selected using model selection procedure)
cals <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/cals_se+mdw.csv")

sites<-sort(unique(hourraw$stream)) # make vector of site names



# loop through the hourly time lapse counts of salmon for each stream, multiplying each count by the model equation 
# to get estimates of true salmon passage.  The model equation is unique to each stream.

hourly<-data.frame(NULL) # pre-load an empty dataframe 

for (i in 1:length(unique(hourraw$stream))){
  temp.df<- subset(hourraw,stream==sites[i]) # select the data for site i 
  
  temp.df$julian<- floor(temp.df$date_x %%365-29) ## calculates julian date
  temp.df$juliandt<- temp.df$date_x %%365-29 ## calculates julian date
  
  temp.df$uppass<- temp.df$up*cals$upslope[cals$stream==sites[i]]+  # calculates the fitted number of upward passing fish
    ((temp.df$up^2)*cals$uppoly[cals$stream==sites[i]])
  
  temp.df$downpass<- temp.df$down*cals$downslope[cals$stream==sites[i]]+ # calculates the fitted number of downward passing fish
    ((temp.df$down^2)*cals$downpoly[cals$stream==sites[i]])
  
  temp.df$pass<-temp.df$uppass-temp.df$downpass # up fish - down fish
  temp.df$cumpass<-cumsum(temp.df$pass) # calculates cumulative sum of fish passage
  hourly<-rbind(hourly,temp.df) # reconstitutes the dataframe
  print(sum(hourly$pass)) #QAQC output as feedback to user
}


##### summarizing salmon passage data by day
daily.s<- ddply(hourly, .(stream,julian), summarise, pass_daily=round(sum(pass),0))
escapement<-ddply(hourly, .(stream), summarise, total=round(max(cumpass),0))




###########################################################################################################
###########################################################################################################
#### Script to calculate bootstrapped 95% Confidence Intervals on escapement

library(boot)

passraw<- subset(hourraw, stream=="Meadow") # Select stream by changing name here

pass.ud<-c(passraw$up, -1*passraw$down) # need to reshape passraw so that each up and down are considered together- downs need to be negative

pass.ud.df<-data.frame(pass.ud)
names(pass.ud.df)[1]<-"photo"

######################################################################
######################################################################    

datud <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") # type path to photo/vid data here
datud <- subset(datud, stream=="Meadow")  #enter stream name here (needs to match case and spelling)

#allstreams <- read.csv("C:/Bear 2014/2014 Data_9_14/Fish Datasheets/calcsvtest/allstreams.csv")
#datud<-allstreams[allstreams$stream=="mdw",]

tot <- function(data, indices, formula, pass.ud.df) {
  d <- datud[indices,] # allows boot to select sample 
  fit <- lm(formula, data=d)
  pred.vid<-predict(fit,pass.ud.df)
  return(sum(pred.vid))
} 


################################################################################  # simple first order

# bootstrapping with 1000 replications  

system.time(results <- boot(data=datud, statistic=tot, 
                            R=1000, formula=vid~photo-1, pass.ud.df=pass.ud.df))

plot(results)
mean(results$t0) # should be close to escapement estimate
boot.ci(results, type="perc") # gives 95% CI values, using percentile method
round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0) # gives averaged plus minus amount

################################################################################  # simple polynomial

# make prediction df
pass.ud.df$photo2<-pass.ud.df$photo^2 
names(pass.ud.df)[2]<-"I(photo^2)"

# bootstrapping with 1000 replications  
results <- boot(data=datud, statistic=tot, 
                R=1000, formula=vid~photo+I(photo^2)-1, pass.ud.df=pass.ud.df)    

plot(results)
mean(results$t0)
boot.ci(results, type="perc")
round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0)




################################################################################  # segmented first order

tot.seg <- function(data, indices, formula, pass.ud.df) {
  d <- datud[indices,] # allows boot to select sample 
  lin.mod <- lm(formula, data=d)
  fit <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))
  pred.vid<-predict(fit,pass.ud.df)
  return(sum(pred.vid))
}

# make prediction df
pass.ud.df$photo2<-pass.ud.df$photo^2 
names(pass.ud.df)[2]<-"I(photo^2)"

pass.ud.df$U1.photo<- c(pass.ud.df$photo[1:(length(pass.ud.df$photo)/2)],rep(0, length(pass.ud.df$photo)/2))

# bootstrapping with R replications  
results <- boot(data=datud, statistic=tot.seg, 
                R=1000, formula=vid~photo-1, pass.ud.df=pass.ud.df)    

plot(results)
mean(results$t0)
boot.ci(results, type="perc")
round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0)


################################################################################  # segmented polynomial

results <- boot(data=datud, statistic=tot.seg, 
                R=1000, formula=vid~photo+I(photo^2)-1, pass.ud.df=pass.ud.df) 

plot(results)
mean(results$t0)
boot.ci(results, type="perc")
round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0)






#########################################################################################################
#########################################################################################################

#######  Script to calculate in-stream salmon abundance

### adding rows with no passage so that instream does not truncate at the end of the year

sites<-sort(unique(daily.s$stream))
temp.daily.s<-data.frame(NULL)

for (i in 1:length(unique(daily.s$stream))){
  
  temp.df3<- subset(daily.s,stream==sites[i])
  stream<-c(rep(paste(sites[i]),20))
  julian<-c(seq(from=max(temp.df3$julian)+1, to=max(temp.df3$julian)+20))
  pass_daily<-c(rep(0,20))
  temp.df4<-data.frame(stream,julian,pass_daily)
  temp.df3<-rbind(temp.df3,temp.df4)
  temp.daily.s<-rbind(temp.daily.s,temp.df3)
}
daily.s<-temp.daily.s

datetest1<-paste(floor(daily.s$julian),2013) ####2013

datetest<-strptime(datetest1, "%j %Y")
daily.s$posdate<-datetest

#################################################################################
##### calculating the number of salmon in the stream each day

# load the streamlife dataframe which tells you the mean and sd of the life of a salmon in each stream

streamlife <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/streamlife_se+mdw.csv")

sites<-sort(unique(daily.s$stream))

day<-(seq(1,40,1))
daysums<-NULL
tempdaysums<-NULL
instream<-NULL

for (j in 1:length(sites)){
  temp.df2<- subset(daily.s, stream==sites[j])
  psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==sites[j]],
              sd= streamlife$sd_sl[streamlife$stream==sites[j]])
  psurv<-1-rev(psurv)
  
  #### now create vector of daily passage, but add 40 zeros up front and 10 in back to give loop room to work
  pass<-temp.df2$pass_daily
  pass<-c(rep(0,39),pass)
  
  for (i in 1:(length(pass)-39)){
    tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv))
  }
  
  daysums<-c(round(tempdaysums,0)) 
  tempdaysums<-NULL
  temp.df2$instream<-daysums
  instream<-rbind(instream,temp.df2)
}


###############################################################
############################################################### PLOTTING
hourly$posdate<-strptime(hourly$date, "%m/%e/%Y %k") # converting text datetime to POSIXlt date time object
#instream<-subset(instream, julian<=270)

###########################################################   Meadow Creek

mdw.is<-subset(instream, stream=="Meadow")
mdw.ho<-subset(hourly, stream=="Meadow")
range(mdw.is$julian)
range(mdw.ho$julian)

#mdw.is<-subset(mdw.is, julian<=270)

##################################################333
#####################################################3

jbreaks<-c(182,197,213,228)
jlims<-c(175,232)

p1<-ggplot(mdw.ho, aes(x=juliandt, y=pass)) + geom_line(size=.6) +     # HOURLY PASSAGE
  scale_x_continuous(breaks=jbreaks, limits=jlims) + 
  scale_y_continuous(breaks=seq(-2000,3000, 1000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01",
                         "2013-08-16"))
datelims <- as.Date(c("2013-06-24", "2013-08-20"))


p2<-ggplot(mdw.is, aes(x=as.Date(posdate), y=instream)) + geom_line(size=.7) +       # MODEL OF SALMON IN STREAM
  scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e"))+
  scale_y_continuous(breaks=seq(0,20000, 2000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), 
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))


p3<- ggplot(mdw.ho, aes(x=juliandt, y=cumpass)) + geom_line(size=.6) +     # Cumulative hourly PASSAGE
  scale_x_continuous(breaks=jbreaks, limits=jlims) + 
  scale_y_continuous(breaks=seq(0,30000, 5000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

grid.arrange(p1, p3, p2, ncol=1)


###########################################################   Southeast Creek

se.is<-subset(instream, stream=="Southeast")
se.ho<-subset(hourly, stream=="Southeast")


##################################################333
#####################################################3

jbreaks<-c(182,197,213,228,244,258)
jlims<-c(182,258)

s1<-ggplot(se.ho, aes(x=juliandt, y=pass)) + geom_line(size=.6) +     # HOURLY PASSAGE
  scale_x_continuous(breaks=jbreaks, limits=jlims) + 
  scale_y_continuous(breaks=seq(-2000,2000, 500)) +
  theme_bw() + theme( panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01",
                         "2013-08-16","2013-09-01", "2013-09-15"))
datelims <- as.Date(c("2013-07-01", "2013-09-15"))


s2<-ggplot(se.is, aes(x=as.Date(posdate), y=instream)) + geom_line(size=.7) +       # MODEL OF SALMON IN STREAM
  scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e"))+
  scale_y_continuous(breaks=seq(0,20000, 4000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), 
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))


s3<- ggplot(se.ho, aes(x=juliandt, y=cumpass)) + geom_line(size=.6) +     # Cumulative hourly PASSAGE
  scale_x_continuous(breaks=jbreaks, limits=jlims) + 
  scale_y_continuous(breaks=seq(0,90000, 10000)) +
  xlab("Date")+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

grid.arrange(s1, s3, s2, ncol=1)


grid.arrange(p1,s1,p3,s3,p2,s2, ncol=2) # combo figure

############################# END # escapement estimate_se+mdw script










# escapement_model_validation script

# script for calculating validation metrics
library(segmented)
library(car)

vals<- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") 
vals<- subset(vals, stream=="Southeast")


#  VALIDATION OF SEGMENTED FIRST ORDER MODEL


n <- 0
y.hat <- as.vector(0)

################################ Running the LOOCV iteratively ('for loop').
for (i in 1 : nrow(vals)){    
  n <- 1 + n
  lin.mod <- lm(vid~photo-1, data=vals[-n,]) # removes nth row
  seg.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))
  
  if (vals$vid[n]>=0) {
    y.hat[i] <- (seg.mod$coefficients[1] + seg.mod$coefficients[2])*vals[n,4]
    
  } else {
    y.hat[i] <- seg.mod$coefficients[1] *vals[n,4]
  }
  
  pred <- data.frame(vals$photo[n]) 
  pred$U1.photo<- if(vals$vid[n]>=0){vals$photo[n]} else {0}
  names(pred)<-c("photo", "U1.photo")
  t2<-predict(seg.mod,pred,se.fit=T,level=.95, interval="confidence")
  
  vals$ci.low[i]<-t2$fit[1,2]
  vals$ci.hi[i]<-t2$fit[1,3]
}

#######################################coverage

cov<-vals$vid<vals$ci.hi&vals$vid>vals$ci.low
sum(cov)/length(cov)



# model with all observations
lin.mod <- lm(vid~photo-1, data=vals)
segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))

plot(vals$photo,segmented.mod$fitted.values, type="l")
points(vals$photo,vals$vid, pch=20, cex=.5)

vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection
vals$y1<-segmented.mod$fitted.values # values predicted using all data in original model

########## overfit errors

ofit.errors <- vals$y1 - y.hat
summary(ofit.errors)
hist(ofit.errors)

cor(y.hat, vals$y1)
scatterplot(y.hat, vals$y1)

of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals)
of.ss

########## LOOCV prediction errors

# Calculating the errors and taking a look at them.
errors <- vals$vid - y.hat
summary(errors)
hist(errors)

cor(y.hat, vals$vid)
scatterplot(y.hat, vals$vid)

# Average Cross validation sums of squares.

cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals)
cv.ss

# Percent escapement error using LOOCV estimates

(sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100  #percent higher of predicted escapement for the 
#70 hours where we know the true escapement from video counts


#####################################################################################################
#####################################################################################################
#  VALIDATION OF SEGMENTED POLYNOMIAL  MODEL

n <- 0
y.hat <- as.vector(0)

# Running the LOOCV iteratively ('for loop').

for (i in 1 : nrow(vals)){
  n <- 1 + n
  lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals[-n,])
  seg.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))
  
  if (vals$vid[n]>=0) {
    y.hat[i] <- ((seg.mod$coefficients[1] + seg.mod$coefficients[3])*vals[n,4])+
      ((vals[n,4]^2)* seg.mod$coefficients[2])
    
  } else {
    y.hat[i] <- (seg.mod$coefficients[1]*vals[n,4])+
      ((vals[n,4]^2)* seg.mod$coefficients[2])
  }
  
  pred <- data.frame(vals$photo[n]) 
  pred$dummy<- vals$photo[n]^2
  pred$U1.photo<- if(vals$vid[n]>=0){vals$photo[n]} else {0}
  names(pred)<-c("photo","I(photo^2)", "U1.photo")
  
  t2<-predict(seg.mod,pred,se.fit=T,level=.9, interval="prediction")
  
  vals$ci.low[i]<-t2$fit[1,2]
  vals$ci.hi[i]<-t2$fit[1,3]
}

#######################################coverage

cov<-vals$vid<vals$ci.hi&vals$vid>vals$ci.low
sum(cov)/length(cov)


# model with all observations
lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals)
segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0))

plot(vals$photo,segmented.mod$fitted.values, type="l")
points(vals$photo,vals$vid, pch=20, cex=.5)

vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection
vals$y1<-segmented.mod$fitted.values # values predicted using all data in original model

########## overfit errors

ofit.errors <- vals$y1 - y.hat
summary(ofit.errors)
hist(ofit.errors)

cor(y.hat, vals$y1)
scatterplot(y.hat, vals$y1)

of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals)
of.ss

########## LOOCV prediction errors

# Calculating the errors and taking a look at them.
errors <- vals$vid - y.hat
summary(errors)
hist(errors)

cor(y.hat, vals$vid)
scatterplot(y.hat, vals$vid)

# Average Cross validation sums of squares.

cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals)
cv.ss

# Percent escapement error using LOOCV estimates

(sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100  #percent higher of predicted escapement for the 
#70 hours where we know the true escapement from video counts


#####################################################################################################
#####################################################################################################
#  VALIDATION OF simple first order model


n <- 0
y.hat <- as.vector(0)

# Running the LOOCV iteratively ('for loop').

for (i in 1 : nrow(vals)){
  n <- 1 + n
  lin.mod <- lm(vid~photo-1, data=vals[-n,])
  y.hat[i] <- lin.mod$coefficients[1] *vals[n,4]
  
  newdata<-data.frame(vals$photo[n])
  names(newdata)<-"photo"
  
  t2<-predict(lin.mod, newdata,se.fit=T,level=.95, interval="confidence")
  
  vals$ci.low[i]<-t2$fit[1,2]
  vals$ci.hi[i]<-t2$fit[1,3]
}

#######################################coverage

cov<-vals$vid<vals$ci.hi&vals$vid>vals$ci.low
sum(cov)/length(cov)

################# coverage test

newdata2<-data.frame(seq(-150,120))
names(newdata2)<-"photo"
pred1<- predict(lin.mod, newdata2,se.fit=T, level=.95,interval="prediction")

plot(newdata2$photo,pred1$fit[,1], type="l")
points(vals$photo,vals$vid, pch=20, cex=.5)
lines(newdata2$photo,pred1$fit[,2], lty=2, col=2)
lines(newdata2$photo,pred1$fit[,3], lty=2, col=2)

# model with all observations
lin.mod <- lm(vid~photo-1, data=vals)

vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection
vals$y1<-lin.mod$fitted.values # values predicted using all data in original model

########## overfit errors

ofit.errors <- vals$y1 - y.hat
summary(ofit.errors)
hist(ofit.errors)

cor(y.hat, vals$y1)
scatterplot(y.hat, vals$y1)

of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals)
of.ss

########## LOOCV prediction errors

# Calculating the errors and taking a look at them.
errors <- vals$vid - y.hat
summary(errors)
hist(errors)

cor(y.hat, vals$vid)
scatterplot(y.hat, vals$vid)

# Average Cross validation sums of squares.

cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals)
cv.ss

# Percent escapement error using LOOCV estimates

(sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100  #percent higher of predicted escapement for the 
#70 hours where we know the true escapement from video counts


#####################################################################################################
#####################################################################################################
#  VALIDATION OF simple linear POLYNOMIAL  model

n <- 0
y.hat <- as.vector(0)

# Running the LOOCV iteratively ('for loop').

for (i in 1 : nrow(vals)){
  n <- 1 + n
  lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals[-n,])
  
  
  y.hat[i] <- lin.mod$coefficients[1] *vals[n,4]  +
    ((vals[n,4]^2)* lin.mod$coefficients[2])
  
  
  newdata<-data.frame(vals$photo[n])
  names(newdata)<-"photo"
  
  t2<-predict(lin.mod, newdata,se.fit=T, interval="confidence")
  
  vals$ci.low[i]<-t2$fit[1,2]
  vals$ci.hi[i]<-t2$fit[1,3]
}

#######################################coverage

cov<-vals$vid<vals$ci.hi&vals$vid>vals$ci.low
sum(cov)/length(cov)


# model with all observations
lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals)

plot(vals$photo,lin.mod$fitted.values, type="l")
points(vals$photo,vals$vid, pch=20, cex=.5)

vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection
vals$y1<-lin.mod$fitted.values # values predicted using all data in original model

########## overfit errors

ofit.errors <- vals$y1 - y.hat
summary(ofit.errors)
hist(ofit.errors)

cor(y.hat, vals$y1)
scatterplot(y.hat, vals$y1)

of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals)
of.ss

########## LOOCV prediction errors

# Calculating the errors and taking a look at them.
errors <- vals$vid - y.hat
summary(errors)
hist(errors)

cor(y.hat, vals$vid)
scatterplot(y.hat, vals$vid)

# Average Cross validation sums of squares.

cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals)
cv.ss

# Percent escapement error using LOOCV estimates

(sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100  #percent higher of predicted escapement for the 
#70 hours where we know the true escapement from video counts


# END escapement_model_validation script






# streamlife_model_fit script





############################################################
################# STREAM LIFE 
##############################################################
##########################  modeling streamlife as a function of width and depth from Carlson et al. 2007

## load packages
library(car)
library(ggplot2)
library(gridExtra)

#########################################################

carlsondat <- read.csv("C:/Bear 2015/Salmon Counting Paper/carlsondatforR.csv") #load carlson data
cor.test(carlsondat$width, carlsondat$depth) # width and depth are very correlated, so  cannot use both in a model

cor.test(carlsondat$mean.sl, carlsondat$sd.sl) # mean and sd of stream life are very correlated, so we will just model the mean and assume 
#the sd is .499* mean

sum(carlsondat$sd.sl)/sum(carlsondat$mean.sl) # where I got .499-- ratio of sd and mean


#mean stream life models
clm1<- lm(mean.sl~width+depth, data=carlsondat) # neither are significant because of  collinearity
vif(clm1)     # vifs over 4 signal bad multicollinearity

# just width
clmw<- lm(mean.sl~width, data=carlsondat) 
summary(clmw)
AIC(clmw)

#just depth
clmd<- lm(mean.sl~depth, data=carlsondat)
summary(clmd) # shows details about model
AIC(clmd)

#well... looks like depth is a better predictor of mean stream life than width. 


#### plot of mean stream life as a function of width and depth

ggplot(carlsondat, aes(x=width, y=mean.sl))+ geom_point(size=3) +
  stat_smooth(method="lm", se=F, color="black", size=1)+
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

## predicting mean stream life in meadow and SE creeks

#Meadow
3.32046+ .28729* 13.2  # Meadow mean stream life (mean depth is 13.2 cm)
(3.32046+ .28729* 13.2)* .499 # meadow sd of stream life


#Southeast
3.32046+ .28729* 9.1  # Southeast mean stream life (mean depth is 9.1 cm)
(3.32046+ .28729*9.1)* .499 # Southeast sd of stream life


######## END streamlife_model_fit script




##### streamlife_sensitivity_analysis script



############################################################
################# STREAM LIFE SENSITIVITY TEST
##############################################################


library(plyr)
library(ggplot2)
library(scales)
library(gridExtra)



##########################  First, need to calculate passage using the normal method.  

#first, load hourly data in long format
hourraw <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/salmon_TLcounts_SE+MDW.csv")

# read in calibration values (selected using model selection procedure)
cals <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/cals_se+mdw.csv")

sites<-sort(unique(hourraw$stream)) # make vector of site names



# loop through the hourly time lapse counts of salmon for each stream, multiplying each count by the model equation 
# to get estimates of true salmon passage.  The model equation is unique to each stream.

hourly<-data.frame(NULL) # pre-load an empty dataframe 

for (i in 1:length(unique(hourraw$stream))){
  temp.df<- subset(hourraw,stream==sites[i]) # select the data for site i 
  
  temp.df$julian<- floor(temp.df$date_x %%365-29) ## calculates julian date
  temp.df$juliandt<- temp.df$date_x %%365-29 ## calculates julian date
  
  temp.df$uppass<- temp.df$up*cals$upslope[cals$stream==sites[i]]+  # calculates the fitted number of upward passing fish
    ((temp.df$up^2)*cals$uppoly[cals$stream==sites[i]])
  
  temp.df$downpass<- temp.df$down*cals$downslope[cals$stream==sites[i]]+ # calculates the fitted number of downward passing fish
    ((temp.df$down^2)*cals$downpoly[cals$stream==sites[i]])
  
  temp.df$pass<-temp.df$uppass-temp.df$downpass # up fish - down fish
  temp.df$cumpass<-cumsum(temp.df$pass) # calculates cumulative sum of fish passage
  hourly<-rbind(hourly,temp.df) # reconstitutes the dataframe
  print(sum(hourly$pass)) #QAQC output as feedback to user
}


##### summarizing salmon passage data by day
daily.s<- ddply(hourly, .(stream,julian), summarise, pass_daily=round(sum(pass),0))







### adding rows with no passage so that instream does not truncate at the end of the year

sites<-sort(unique(daily.s$stream))
temp.daily.s<-data.frame(NULL)

for (i in 1:length(unique(daily.s$stream))){
  
  temp.df3<- subset(daily.s,stream==sites[i])
  stream<-c(rep(paste(sites[i]),20))
  julian<-c(seq(from=max(temp.df3$julian)+1, to=max(temp.df3$julian)+20))
  pass_daily<-c(rep(0,20))
  temp.df4<-data.frame(stream,julian,pass_daily)
  temp.df3<-rbind(temp.df3,temp.df4)
  temp.daily.s<-rbind(temp.daily.s,temp.df3)
}
daily.s<-temp.daily.s

datetest1<-paste(floor(daily.s$julian),2013) 

datetest<-strptime(datetest1, "%j %Y")
daily.s$posdate<-datetest




############################################################### 
############################################################### MESSING WITH STREAM LIFE PARAMETERS
############################################################### TO DETERMINE SENSITIVITY



# first, load the streamlife dataframe which tells you the mean and sd of the life of a salmon in each stream
streamlife <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/streamlife_se+mdw.csv")
stream<-"Southeast" # set stream here

daily.s.sub<-subset(daily.s, stream=="Southeast")  # set stream here

###############################################################
###############################################################     CHANGING  MEAN
###############################################################  

day<-(seq(1,40,1))
daysums<-NULL
tempdaysums<-NULL
instream<-NULL
slmult<-seq(-4,4,2)

for (j in 1:length(slmult)){
  temp.df2<- daily.s.sub
  
  
  psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==stream]+slmult[j], sd= (streamlife$mu_sl[streamlife$stream==stream]*.499))
  psurv<-1-rev(psurv)
  
  #### now create vector of daily passage, but add 40 zeros up front to give loop room to work
  pass<-temp.df2$pass_daily
  pass<-c(rep(0,39),pass)
  
  for (i in 1:(length(pass)-39)){
    tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv))
  }
  
  daysums<-c(round(tempdaysums,0)) 
  tempdaysums<-NULL
  temp.df2$instream<-daysums
  temp.df2$slmult<-paste(slmult[j])
  temp.df2$slmu<-paste(streamlife$mu_sl[streamlife$stream==stream]+slmult[j])
  temp.df2$slsd<-paste(streamlife$mu_sl[streamlife$stream==stream]*.499)
  instream<-rbind(instream,temp.df2)
}

se.m.df<- instream

datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01",
                         "2013-08-16"))
datelims <- as.Date(c("2013-06-24", "2013-09-20"))


plot1<-ggplot(instream, aes(x=as.Date(posdate), y=instream, colour=slmult)) + geom_line(size=0.7) + 
  scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e")) + 
  scale_y_continuous(breaks=seq(0,30000, 5000), limits=c(0,23000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))


############################################################### 
###############################################################     CHANGING  SD
###############################################################

day<-(seq(1,40,1))
daysums<-NULL
tempdaysums<-NULL
instream<-NULL
slmult<-seq(-4,4,2)

for (j in 1:length(slmult)){
  temp.df2<- daily.s.sub
  psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==stream],sd= (streamlife$mu_sl[streamlife$stream==stream]+slmult[j])*.4999)
  
  psurv<-1-rev(psurv)
  
  #### now create vector of daily passage, but add 40 zeros up front to give loop room to work
  pass<-temp.df2$pass_daily
  pass<-c(rep(0,39),pass)
  
  for (i in 1:(length(pass)-39)){
    tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv))
  }
  
  daysums<-c(round(tempdaysums,0)) 
  tempdaysums<-NULL
  temp.df2$instream<-daysums
  temp.df2$slmult<-paste(slmult[j])
  temp.df2$slmu<-paste(streamlife$mu_sl[streamlife$stream==stream])
  temp.df2$slsd<-paste((streamlife$mu_sl[streamlife$stream==stream]+slmult[j])*.4999)
  instream<-rbind(instream,temp.df2)
}

se.s.df<- instream

plot2<- ggplot(instream, aes(x=as.Date(posdate), y=instream, colour=slmult)) + geom_line(size=0.7) + 
  scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e")) + 
  scale_y_continuous(breaks=seq(0,30000, 5000), limits=c(0,23000)) +
  theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(),
                      axis.title.y=element_text(size=16, lineheight=.9),
                      axis.title.x=element_text(size=16, lineheight=.9, hjust=.46),
                      axis.text=element_text(size=12))

grid.arrange(plot1,plot2) # combo plot of all above