# Note: The code for this analysis is in five parts, each of which are pasted below and demarcated by file names and "END" labels. # For best results, paste each section into its own .r file and run seperately. # Regression_fit code library(car) library(ggplot2) library(segmented) # load data in long format here, assign to "datud" # each hour should have two pairs of values, a pair of upstream photo and video counts, # and a pair of downstream points. Columns should be labeled "photo" "vid" and "direction" (case sensitive) datud <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") # type path to photo/vid data here datud <- subset(datud, stream=="Southeast") #enter stream name here (needs to match case and spelling) ################################################################## #################################################################### # making the simple linear models. The -1 causes the model to fit without an intercept (forces it thorugh the origin) lin.mod <- lm(vid~photo-1, data=datud) lin.mod.p <- lm(vid~photo+I(photo^2)-1, data=datud) #polynomial version # fitting segmented models -- have to change the segmented control parameters to do no iterations # in order to constrain the breakpoint to the origin but still create the segmented object -- psi is the number of iterations segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) segmented.mod.p <- segmented(lin.mod.p, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) #same but polynomial # constructing aic list with labels aic<-c( round(AIC(lin.mod),2), round(AIC(lin.mod.p),2), round(AIC(segmented.mod),2), round(AIC(segmented.mod.p),2)) modnames<-c("lin.mod", "lin.mod.p", "segmented.mod", "segemented.mod.p") data.frame(modnames[order(aic)],aic[order(aic)]) # shows table of models in order of AIC (lower is better) # enter name of model to see details summary(lin.mod.p) ############################################################### # MODEL DIAGNOSTICS #run the line of code for the model name you want to assess (only run the one you want) mod<-lin.mod mod<-lin.mod.p mod<-segmented.mod mod<-segmented.mod.p par(mfrow=c(2,2)) plot(mod) #assessing normality qqPlot(mod, simulate=TRUE, main="Q-Q Plot") #assessing linearity of relationship #residplot function (only have to run this part once, then can use the "residplot()" function afterwards) residplot <- function(fit, nbreaks=10) { z <- rstudent(fit) hist(z, breaks=nbreaks, freq=FALSE, xlab="Studentized Residual", main="Distribution of Errors") rug(jitter(z), col="brown") curve(dnorm(x, mean=mean(z), sd=sd(z)), add=TRUE, col="blue", lwd=2) lines(density(z)$x, density(z)$y, col="red", lwd=2, lty=2) legend("topright", legend = c( "Normal Curve", "Kernel Density Curve"), lty=1:2, col=c("blue","red"), cex=.7) } residplot(mod) #assessing constancy of error variance (homoskedasticity) ncvTest(mod) spreadLevelPlot(mod) # influence and leverage tools influence.measures(mod) cooks.distance(mod) ######## video~ photo simple linear (NOT segmented) ggplot(data=datud, aes(x=photo, y=vid)) + geom_point(pch=16, size=1) + geom_smooth(method="lm", formula= y~x-1, colour="black", linetype=1, size=.5, se=F)+ #scale_y_continuous(breaks=seq(-50,450, 100)) + geom_hline(yintercept=0, size=.5, linetype=3)+ geom_vline(xintercept=0, size=.5, linetype=3)+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=14)) ######## video~ photo simple linear (YES segmented) ggplot(data=datud, aes(x=photo, y=vid, group=direction)) + geom_point(pch=16, size=1) + geom_smooth(method="lm", formula= y~x-1, colour="black", linetype=1, size=.5, se=F)+ #scale_y_continuous(breaks=seq(-50,450, 100)) + geom_hline(yintercept=0, size=.5, linetype=3)+ geom_vline(xintercept=0, size=.5, linetype=3)+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=14)) ######## video~ photo polynomial (NOT segmented) ggplot(data=datud, aes(x=photo, y=vid)) + geom_point(pch=16, size=1) + geom_smooth(method="lm", formula= y~x+I(x^2)-1, colour="black", linetype=2, size=.5, se=F) + #scale_y_continuous(breaks=seq(-50,450, 100)) + geom_hline(yintercept=0, size=.5, linetype=3)+ geom_vline(xintercept=0, size=.5, linetype=3)+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=14)) ######## video~ photo polynomial (YES segmented) ggplot(data=datud, aes(x=photo, y=vid, group=direction)) + geom_point(pch=16, size=1) + geom_smooth(method="lm", formula= y~x+I(x^2)-1, colour="black", linetype=2, size=.5, se=F) + #scale_y_continuous(breaks=seq(-50,450, 100)) + geom_hline(yintercept=0, size=.5, linetype=3)+ geom_vline(xintercept=0, size=.5, linetype=3)+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=14)) ########################## END of regression_fit # escapement estimate_se+mdw script ################## this script calculates predicted salmon passage, estimated in-stream salmon abundance, cumulative abundance (escapement) ## and confidence intervals on escapement library(plyr) library(ggplot2) library(gridExtra) library(segmented) library(scales) #first, load hourly data in long format hourraw <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/salmon_TLcounts_SE+MDW.csv") # read in calibration values (selected using model selection procedure) cals <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/cals_se+mdw.csv") sites<-sort(unique(hourraw$stream)) # make vector of site names # loop through the hourly time lapse counts of salmon for each stream, multiplying each count by the model equation # to get estimates of true salmon passage. The model equation is unique to each stream. hourly<-data.frame(NULL) # pre-load an empty dataframe for (i in 1:length(unique(hourraw$stream))){ temp.df<- subset(hourraw,stream==sites[i]) # select the data for site i temp.df$julian<- floor(temp.df$date_x %%365-29) ## calculates julian date temp.df$juliandt<- temp.df$date_x %%365-29 ## calculates julian date temp.df$uppass<- temp.df$up*cals$upslope[cals$stream==sites[i]]+ # calculates the fitted number of upward passing fish ((temp.df$up^2)*cals$uppoly[cals$stream==sites[i]]) temp.df$downpass<- temp.df$down*cals$downslope[cals$stream==sites[i]]+ # calculates the fitted number of downward passing fish ((temp.df$down^2)*cals$downpoly[cals$stream==sites[i]]) temp.df$pass<-temp.df$uppass-temp.df$downpass # up fish - down fish temp.df$cumpass<-cumsum(temp.df$pass) # calculates cumulative sum of fish passage hourly<-rbind(hourly,temp.df) # reconstitutes the dataframe print(sum(hourly$pass)) #QAQC output as feedback to user } ##### summarizing salmon passage data by day daily.s<- ddply(hourly, .(stream,julian), summarise, pass_daily=round(sum(pass),0)) escapement<-ddply(hourly, .(stream), summarise, total=round(max(cumpass),0)) ########################################################################################################### ########################################################################################################### #### Script to calculate bootstrapped 95% Confidence Intervals on escapement library(boot) passraw<- subset(hourraw, stream=="Meadow") # Select stream by changing name here pass.ud<-c(passraw$up, -1*passraw$down) # need to reshape passraw so that each up and down are considered together- downs need to be negative pass.ud.df<-data.frame(pass.ud) names(pass.ud.df)[1]<-"photo" ###################################################################### ###################################################################### datud <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") # type path to photo/vid data here datud <- subset(datud, stream=="Meadow") #enter stream name here (needs to match case and spelling) #allstreams <- read.csv("C:/Bear 2014/2014 Data_9_14/Fish Datasheets/calcsvtest/allstreams.csv") #datud<-allstreams[allstreams$stream=="mdw",] tot <- function(data, indices, formula, pass.ud.df) { d <- datud[indices,] # allows boot to select sample fit <- lm(formula, data=d) pred.vid<-predict(fit,pass.ud.df) return(sum(pred.vid)) } ################################################################################ # simple first order # bootstrapping with 1000 replications system.time(results <- boot(data=datud, statistic=tot, R=1000, formula=vid~photo-1, pass.ud.df=pass.ud.df)) plot(results) mean(results$t0) # should be close to escapement estimate boot.ci(results, type="perc") # gives 95% CI values, using percentile method round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0) # gives averaged plus minus amount ################################################################################ # simple polynomial # make prediction df pass.ud.df$photo2<-pass.ud.df$photo^2 names(pass.ud.df)[2]<-"I(photo^2)" # bootstrapping with 1000 replications results <- boot(data=datud, statistic=tot, R=1000, formula=vid~photo+I(photo^2)-1, pass.ud.df=pass.ud.df) plot(results) mean(results$t0) boot.ci(results, type="perc") round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0) ################################################################################ # segmented first order tot.seg <- function(data, indices, formula, pass.ud.df) { d <- datud[indices,] # allows boot to select sample lin.mod <- lm(formula, data=d) fit <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) pred.vid<-predict(fit,pass.ud.df) return(sum(pred.vid)) } # make prediction df pass.ud.df$photo2<-pass.ud.df$photo^2 names(pass.ud.df)[2]<-"I(photo^2)" pass.ud.df$U1.photo<- c(pass.ud.df$photo[1:(length(pass.ud.df$photo)/2)],rep(0, length(pass.ud.df$photo)/2)) # bootstrapping with R replications results <- boot(data=datud, statistic=tot.seg, R=1000, formula=vid~photo-1, pass.ud.df=pass.ud.df) plot(results) mean(results$t0) boot.ci(results, type="perc") round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0) ################################################################################ # segmented polynomial results <- boot(data=datud, statistic=tot.seg, R=1000, formula=vid~photo+I(photo^2)-1, pass.ud.df=pass.ud.df) plot(results) mean(results$t0) boot.ci(results, type="perc") round((boot.ci(results, type="perc")$percent[1,5]-boot.ci(results, type="perc")$percent[1,4])/2,0) ######################################################################################################### ######################################################################################################### ####### Script to calculate in-stream salmon abundance ### adding rows with no passage so that instream does not truncate at the end of the year sites<-sort(unique(daily.s$stream)) temp.daily.s<-data.frame(NULL) for (i in 1:length(unique(daily.s$stream))){ temp.df3<- subset(daily.s,stream==sites[i]) stream<-c(rep(paste(sites[i]),20)) julian<-c(seq(from=max(temp.df3$julian)+1, to=max(temp.df3$julian)+20)) pass_daily<-c(rep(0,20)) temp.df4<-data.frame(stream,julian,pass_daily) temp.df3<-rbind(temp.df3,temp.df4) temp.daily.s<-rbind(temp.daily.s,temp.df3) } daily.s<-temp.daily.s datetest1<-paste(floor(daily.s$julian),2013) ####2013 datetest<-strptime(datetest1, "%j %Y") daily.s$posdate<-datetest ################################################################################# ##### calculating the number of salmon in the stream each day # load the streamlife dataframe which tells you the mean and sd of the life of a salmon in each stream streamlife <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/streamlife_se+mdw.csv") sites<-sort(unique(daily.s$stream)) day<-(seq(1,40,1)) daysums<-NULL tempdaysums<-NULL instream<-NULL for (j in 1:length(sites)){ temp.df2<- subset(daily.s, stream==sites[j]) psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==sites[j]], sd= streamlife$sd_sl[streamlife$stream==sites[j]]) psurv<-1-rev(psurv) #### now create vector of daily passage, but add 40 zeros up front and 10 in back to give loop room to work pass<-temp.df2$pass_daily pass<-c(rep(0,39),pass) for (i in 1:(length(pass)-39)){ tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv)) } daysums<-c(round(tempdaysums,0)) tempdaysums<-NULL temp.df2$instream<-daysums instream<-rbind(instream,temp.df2) } ############################################################### ############################################################### PLOTTING hourly$posdate<-strptime(hourly$date, "%m/%e/%Y %k") # converting text datetime to POSIXlt date time object #instream<-subset(instream, julian<=270) ########################################################### Meadow Creek mdw.is<-subset(instream, stream=="Meadow") mdw.ho<-subset(hourly, stream=="Meadow") range(mdw.is$julian) range(mdw.ho$julian) #mdw.is<-subset(mdw.is, julian<=270) ##################################################333 #####################################################3 jbreaks<-c(182,197,213,228) jlims<-c(175,232) p1<-ggplot(mdw.ho, aes(x=juliandt, y=pass)) + geom_line(size=.6) + # HOURLY PASSAGE scale_x_continuous(breaks=jbreaks, limits=jlims) + scale_y_continuous(breaks=seq(-2000,3000, 1000)) + theme_bw() + theme( panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01", "2013-08-16")) datelims <- as.Date(c("2013-06-24", "2013-08-20")) p2<-ggplot(mdw.is, aes(x=as.Date(posdate), y=instream)) + geom_line(size=.7) + # MODEL OF SALMON IN STREAM scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e"))+ scale_y_continuous(breaks=seq(0,20000, 2000)) + theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) p3<- ggplot(mdw.ho, aes(x=juliandt, y=cumpass)) + geom_line(size=.6) + # Cumulative hourly PASSAGE scale_x_continuous(breaks=jbreaks, limits=jlims) + scale_y_continuous(breaks=seq(0,30000, 5000)) + theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) grid.arrange(p1, p3, p2, ncol=1) ########################################################### Southeast Creek se.is<-subset(instream, stream=="Southeast") se.ho<-subset(hourly, stream=="Southeast") ##################################################333 #####################################################3 jbreaks<-c(182,197,213,228,244,258) jlims<-c(182,258) s1<-ggplot(se.ho, aes(x=juliandt, y=pass)) + geom_line(size=.6) + # HOURLY PASSAGE scale_x_continuous(breaks=jbreaks, limits=jlims) + scale_y_continuous(breaks=seq(-2000,2000, 500)) + theme_bw() + theme( panel.grid.minor = element_blank(), panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01", "2013-08-16","2013-09-01", "2013-09-15")) datelims <- as.Date(c("2013-07-01", "2013-09-15")) s2<-ggplot(se.is, aes(x=as.Date(posdate), y=instream)) + geom_line(size=.7) + # MODEL OF SALMON IN STREAM scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e"))+ scale_y_continuous(breaks=seq(0,20000, 4000)) + theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) s3<- ggplot(se.ho, aes(x=juliandt, y=cumpass)) + geom_line(size=.6) + # Cumulative hourly PASSAGE scale_x_continuous(breaks=jbreaks, limits=jlims) + scale_y_continuous(breaks=seq(0,90000, 10000)) + xlab("Date")+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) grid.arrange(s1, s3, s2, ncol=1) grid.arrange(p1,s1,p3,s3,p2,s2, ncol=2) # combo figure ############################# END # escapement estimate_se+mdw script # escapement_model_validation script # script for calculating validation metrics library(segmented) library(car) vals<- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/photo_vid_data_se+mdw.csv") vals<- subset(vals, stream=="Southeast") # VALIDATION OF SEGMENTED FIRST ORDER MODEL n <- 0 y.hat <- as.vector(0) ################################ Running the LOOCV iteratively ('for loop'). for (i in 1 : nrow(vals)){ n <- 1 + n lin.mod <- lm(vid~photo-1, data=vals[-n,]) # removes nth row seg.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) if (vals$vid[n]>=0) { y.hat[i] <- (seg.mod$coefficients[1] + seg.mod$coefficients[2])*vals[n,4] } else { y.hat[i] <- seg.mod$coefficients[1] *vals[n,4] } pred <- data.frame(vals$photo[n]) pred$U1.photo<- if(vals$vid[n]>=0){vals$photo[n]} else {0} names(pred)<-c("photo", "U1.photo") t2<-predict(seg.mod,pred,se.fit=T,level=.95, interval="confidence") vals$ci.low[i]<-t2$fit[1,2] vals$ci.hi[i]<-t2$fit[1,3] } #######################################coverage cov<-vals$vidvals$ci.low sum(cov)/length(cov) # model with all observations lin.mod <- lm(vid~photo-1, data=vals) segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) plot(vals$photo,segmented.mod$fitted.values, type="l") points(vals$photo,vals$vid, pch=20, cex=.5) vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection vals$y1<-segmented.mod$fitted.values # values predicted using all data in original model ########## overfit errors ofit.errors <- vals$y1 - y.hat summary(ofit.errors) hist(ofit.errors) cor(y.hat, vals$y1) scatterplot(y.hat, vals$y1) of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals) of.ss ########## LOOCV prediction errors # Calculating the errors and taking a look at them. errors <- vals$vid - y.hat summary(errors) hist(errors) cor(y.hat, vals$vid) scatterplot(y.hat, vals$vid) # Average Cross validation sums of squares. cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals) cv.ss # Percent escapement error using LOOCV estimates (sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100 #percent higher of predicted escapement for the #70 hours where we know the true escapement from video counts ##################################################################################################### ##################################################################################################### # VALIDATION OF SEGMENTED POLYNOMIAL MODEL n <- 0 y.hat <- as.vector(0) # Running the LOOCV iteratively ('for loop'). for (i in 1 : nrow(vals)){ n <- 1 + n lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals[-n,]) seg.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) if (vals$vid[n]>=0) { y.hat[i] <- ((seg.mod$coefficients[1] + seg.mod$coefficients[3])*vals[n,4])+ ((vals[n,4]^2)* seg.mod$coefficients[2]) } else { y.hat[i] <- (seg.mod$coefficients[1]*vals[n,4])+ ((vals[n,4]^2)* seg.mod$coefficients[2]) } pred <- data.frame(vals$photo[n]) pred$dummy<- vals$photo[n]^2 pred$U1.photo<- if(vals$vid[n]>=0){vals$photo[n]} else {0} names(pred)<-c("photo","I(photo^2)", "U1.photo") t2<-predict(seg.mod,pred,se.fit=T,level=.9, interval="prediction") vals$ci.low[i]<-t2$fit[1,2] vals$ci.hi[i]<-t2$fit[1,3] } #######################################coverage cov<-vals$vidvals$ci.low sum(cov)/length(cov) # model with all observations lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals) segmented.mod <- segmented(lin.mod, seg.Z = ~photo, psi=0, control=seg.control(it.max=0)) plot(vals$photo,segmented.mod$fitted.values, type="l") points(vals$photo,vals$vid, pch=20, cex=.5) vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection vals$y1<-segmented.mod$fitted.values # values predicted using all data in original model ########## overfit errors ofit.errors <- vals$y1 - y.hat summary(ofit.errors) hist(ofit.errors) cor(y.hat, vals$y1) scatterplot(y.hat, vals$y1) of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals) of.ss ########## LOOCV prediction errors # Calculating the errors and taking a look at them. errors <- vals$vid - y.hat summary(errors) hist(errors) cor(y.hat, vals$vid) scatterplot(y.hat, vals$vid) # Average Cross validation sums of squares. cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals) cv.ss # Percent escapement error using LOOCV estimates (sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100 #percent higher of predicted escapement for the #70 hours where we know the true escapement from video counts ##################################################################################################### ##################################################################################################### # VALIDATION OF simple first order model n <- 0 y.hat <- as.vector(0) # Running the LOOCV iteratively ('for loop'). for (i in 1 : nrow(vals)){ n <- 1 + n lin.mod <- lm(vid~photo-1, data=vals[-n,]) y.hat[i] <- lin.mod$coefficients[1] *vals[n,4] newdata<-data.frame(vals$photo[n]) names(newdata)<-"photo" t2<-predict(lin.mod, newdata,se.fit=T,level=.95, interval="confidence") vals$ci.low[i]<-t2$fit[1,2] vals$ci.hi[i]<-t2$fit[1,3] } #######################################coverage cov<-vals$vidvals$ci.low sum(cov)/length(cov) ################# coverage test newdata2<-data.frame(seq(-150,120)) names(newdata2)<-"photo" pred1<- predict(lin.mod, newdata2,se.fit=T, level=.95,interval="prediction") plot(newdata2$photo,pred1$fit[,1], type="l") points(vals$photo,vals$vid, pch=20, cex=.5) lines(newdata2$photo,pred1$fit[,2], lty=2, col=2) lines(newdata2$photo,pred1$fit[,3], lty=2, col=2) # model with all observations lin.mod <- lm(vid~photo-1, data=vals) vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection vals$y1<-lin.mod$fitted.values # values predicted using all data in original model ########## overfit errors ofit.errors <- vals$y1 - y.hat summary(ofit.errors) hist(ofit.errors) cor(y.hat, vals$y1) scatterplot(y.hat, vals$y1) of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals) of.ss ########## LOOCV prediction errors # Calculating the errors and taking a look at them. errors <- vals$vid - y.hat summary(errors) hist(errors) cor(y.hat, vals$vid) scatterplot(y.hat, vals$vid) # Average Cross validation sums of squares. cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals) cv.ss # Percent escapement error using LOOCV estimates (sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100 #percent higher of predicted escapement for the #70 hours where we know the true escapement from video counts ##################################################################################################### ##################################################################################################### # VALIDATION OF simple linear POLYNOMIAL model n <- 0 y.hat <- as.vector(0) # Running the LOOCV iteratively ('for loop'). for (i in 1 : nrow(vals)){ n <- 1 + n lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals[-n,]) y.hat[i] <- lin.mod$coefficients[1] *vals[n,4] + ((vals[n,4]^2)* lin.mod$coefficients[2]) newdata<-data.frame(vals$photo[n]) names(newdata)<-"photo" t2<-predict(lin.mod, newdata,se.fit=T, interval="confidence") vals$ci.low[i]<-t2$fit[1,2] vals$ci.hi[i]<-t2$fit[1,3] } #######################################coverage cov<-vals$vidvals$ci.low sum(cov)/length(cov) # model with all observations lin.mod <- lm(vid~photo+I(photo^2)-1, data=vals) plot(vals$photo,lin.mod$fitted.values, type="l") points(vals$photo,vals$vid, pch=20, cex=.5) vals$Xval.yhat<-y.hat #putting values predicted using LOOCV into dataframe for easy visual inspection vals$y1<-lin.mod$fitted.values # values predicted using all data in original model ########## overfit errors ofit.errors <- vals$y1 - y.hat summary(ofit.errors) hist(ofit.errors) cor(y.hat, vals$y1) scatterplot(y.hat, vals$y1) of.ss <- sum((vals$y1 - y.hat)^2)/nrow(vals) of.ss ########## LOOCV prediction errors # Calculating the errors and taking a look at them. errors <- vals$vid - y.hat summary(errors) hist(errors) cor(y.hat, vals$vid) scatterplot(y.hat, vals$vid) # Average Cross validation sums of squares. cv.ss <- sum((vals$vid - y.hat)^2)/nrow(vals) cv.ss # Percent escapement error using LOOCV estimates (sum(vals$Xval.yhat)-sum(vals$vid))/sum(vals$Xval.yhat)*100 #percent higher of predicted escapement for the #70 hours where we know the true escapement from video counts # END escapement_model_validation script # streamlife_model_fit script ############################################################ ################# STREAM LIFE ############################################################## ########################## modeling streamlife as a function of width and depth from Carlson et al. 2007 ## load packages library(car) library(ggplot2) library(gridExtra) ######################################################### carlsondat <- read.csv("C:/Bear 2015/Salmon Counting Paper/carlsondatforR.csv") #load carlson data cor.test(carlsondat$width, carlsondat$depth) # width and depth are very correlated, so cannot use both in a model cor.test(carlsondat$mean.sl, carlsondat$sd.sl) # mean and sd of stream life are very correlated, so we will just model the mean and assume #the sd is .499* mean sum(carlsondat$sd.sl)/sum(carlsondat$mean.sl) # where I got .499-- ratio of sd and mean #mean stream life models clm1<- lm(mean.sl~width+depth, data=carlsondat) # neither are significant because of collinearity vif(clm1) # vifs over 4 signal bad multicollinearity # just width clmw<- lm(mean.sl~width, data=carlsondat) summary(clmw) AIC(clmw) #just depth clmd<- lm(mean.sl~depth, data=carlsondat) summary(clmd) # shows details about model AIC(clmd) #well... looks like depth is a better predictor of mean stream life than width. #### plot of mean stream life as a function of width and depth ggplot(carlsondat, aes(x=width, y=mean.sl))+ geom_point(size=3) + stat_smooth(method="lm", se=F, color="black", size=1)+ theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) ## predicting mean stream life in meadow and SE creeks #Meadow 3.32046+ .28729* 13.2 # Meadow mean stream life (mean depth is 13.2 cm) (3.32046+ .28729* 13.2)* .499 # meadow sd of stream life #Southeast 3.32046+ .28729* 9.1 # Southeast mean stream life (mean depth is 9.1 cm) (3.32046+ .28729*9.1)* .499 # Southeast sd of stream life ######## END streamlife_model_fit script ##### streamlife_sensitivity_analysis script ############################################################ ################# STREAM LIFE SENSITIVITY TEST ############################################################## library(plyr) library(ggplot2) library(scales) library(gridExtra) ########################## First, need to calculate passage using the normal method. #first, load hourly data in long format hourraw <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/salmon_TLcounts_SE+MDW.csv") # read in calibration values (selected using model selection procedure) cals <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/cals_se+mdw.csv") sites<-sort(unique(hourraw$stream)) # make vector of site names # loop through the hourly time lapse counts of salmon for each stream, multiplying each count by the model equation # to get estimates of true salmon passage. The model equation is unique to each stream. hourly<-data.frame(NULL) # pre-load an empty dataframe for (i in 1:length(unique(hourraw$stream))){ temp.df<- subset(hourraw,stream==sites[i]) # select the data for site i temp.df$julian<- floor(temp.df$date_x %%365-29) ## calculates julian date temp.df$juliandt<- temp.df$date_x %%365-29 ## calculates julian date temp.df$uppass<- temp.df$up*cals$upslope[cals$stream==sites[i]]+ # calculates the fitted number of upward passing fish ((temp.df$up^2)*cals$uppoly[cals$stream==sites[i]]) temp.df$downpass<- temp.df$down*cals$downslope[cals$stream==sites[i]]+ # calculates the fitted number of downward passing fish ((temp.df$down^2)*cals$downpoly[cals$stream==sites[i]]) temp.df$pass<-temp.df$uppass-temp.df$downpass # up fish - down fish temp.df$cumpass<-cumsum(temp.df$pass) # calculates cumulative sum of fish passage hourly<-rbind(hourly,temp.df) # reconstitutes the dataframe print(sum(hourly$pass)) #QAQC output as feedback to user } ##### summarizing salmon passage data by day daily.s<- ddply(hourly, .(stream,julian), summarise, pass_daily=round(sum(pass),0)) ### adding rows with no passage so that instream does not truncate at the end of the year sites<-sort(unique(daily.s$stream)) temp.daily.s<-data.frame(NULL) for (i in 1:length(unique(daily.s$stream))){ temp.df3<- subset(daily.s,stream==sites[i]) stream<-c(rep(paste(sites[i]),20)) julian<-c(seq(from=max(temp.df3$julian)+1, to=max(temp.df3$julian)+20)) pass_daily<-c(rep(0,20)) temp.df4<-data.frame(stream,julian,pass_daily) temp.df3<-rbind(temp.df3,temp.df4) temp.daily.s<-rbind(temp.daily.s,temp.df3) } daily.s<-temp.daily.s datetest1<-paste(floor(daily.s$julian),2013) datetest<-strptime(datetest1, "%j %Y") daily.s$posdate<-datetest ############################################################### ############################################################### MESSING WITH STREAM LIFE PARAMETERS ############################################################### TO DETERMINE SENSITIVITY # first, load the streamlife dataframe which tells you the mean and sd of the life of a salmon in each stream streamlife <- read.csv("C:/Bear 2015/Salmon Data summary/code for pubs/streamlife_se+mdw.csv") stream<-"Southeast" # set stream here daily.s.sub<-subset(daily.s, stream=="Southeast") # set stream here ############################################################### ############################################################### CHANGING MEAN ############################################################### day<-(seq(1,40,1)) daysums<-NULL tempdaysums<-NULL instream<-NULL slmult<-seq(-4,4,2) for (j in 1:length(slmult)){ temp.df2<- daily.s.sub psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==stream]+slmult[j], sd= (streamlife$mu_sl[streamlife$stream==stream]*.499)) psurv<-1-rev(psurv) #### now create vector of daily passage, but add 40 zeros up front to give loop room to work pass<-temp.df2$pass_daily pass<-c(rep(0,39),pass) for (i in 1:(length(pass)-39)){ tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv)) } daysums<-c(round(tempdaysums,0)) tempdaysums<-NULL temp.df2$instream<-daysums temp.df2$slmult<-paste(slmult[j]) temp.df2$slmu<-paste(streamlife$mu_sl[streamlife$stream==stream]+slmult[j]) temp.df2$slsd<-paste(streamlife$mu_sl[streamlife$stream==stream]*.499) instream<-rbind(instream,temp.df2) } se.m.df<- instream datebreaks <- as.Date(c( "2013-07-01","2013-07-16","2013-08-01", "2013-08-16")) datelims <- as.Date(c("2013-06-24", "2013-09-20")) plot1<-ggplot(instream, aes(x=as.Date(posdate), y=instream, colour=slmult)) + geom_line(size=0.7) + scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e")) + scale_y_continuous(breaks=seq(0,30000, 5000), limits=c(0,23000)) + theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) ############################################################### ############################################################### CHANGING SD ############################################################### day<-(seq(1,40,1)) daysums<-NULL tempdaysums<-NULL instream<-NULL slmult<-seq(-4,4,2) for (j in 1:length(slmult)){ temp.df2<- daily.s.sub psurv=pnorm(day,mean=streamlife$mu_sl[streamlife$stream==stream],sd= (streamlife$mu_sl[streamlife$stream==stream]+slmult[j])*.4999) psurv<-1-rev(psurv) #### now create vector of daily passage, but add 40 zeros up front to give loop room to work pass<-temp.df2$pass_daily pass<-c(rep(0,39),pass) for (i in 1:(length(pass)-39)){ tempdaysums[i]<- sum(pass[i:(i+39)]*(psurv)) } daysums<-c(round(tempdaysums,0)) tempdaysums<-NULL temp.df2$instream<-daysums temp.df2$slmult<-paste(slmult[j]) temp.df2$slmu<-paste(streamlife$mu_sl[streamlife$stream==stream]) temp.df2$slsd<-paste((streamlife$mu_sl[streamlife$stream==stream]+slmult[j])*.4999) instream<-rbind(instream,temp.df2) } se.s.df<- instream plot2<- ggplot(instream, aes(x=as.Date(posdate), y=instream, colour=slmult)) + geom_line(size=0.7) + scale_x_date(breaks=datebreaks, limits=datelims, labels=date_format("%b %e")) + scale_y_continuous(breaks=seq(0,30000, 5000), limits=c(0,23000)) + theme_bw() + theme( panel.grid.minor = element_blank(),panel.grid.major = element_blank(), axis.title.y=element_text(size=16, lineheight=.9), axis.title.x=element_text(size=16, lineheight=.9, hjust=.46), axis.text=element_text(size=12)) grid.arrange(plot1,plot2) # combo plot of all above