############################################## ### BERGSHOEFF ET AL. - DEPLOYMENT STATS ### ### Last Updated: 11/27/17 ### ############################################## ##################### ### Load packages ### ##################### require("plyr") require("tidyr") require("magrittr") require("beanplot") require("plotrix") require("lme4") require("glmm") require("nlme") require("psych") require("pastecs") require("lubridate") require("ggplot2") require("dplyr") require("car") require("MASS") require("Hmisc") require("doBy") ############################# ### Set working directiry ### ############################# setwd("C:/Users/Jon/Dropbox/MSc Documents/Green Crab Project/Thesis/Chapter 1/MS1 - PeerJ/PeerJ - Version 1/SI") ####################### ### SET DEFAULT PAR ### ####################### par() def <- par() ###################### ### DATA WRANGLING ### ###################### # Bring in the raw data raw_data <- read.csv("Bergshoeff et al Deployment Data.csv", nrows = 79)[1:37] # Remove DEP 21 because it does not have a cam:no-cam pair, only no-cam raw_data <- raw_data[!(raw_data$DEP==21),] # Turn DUR_HR_DEC into an r-readable time format raw_data$DATE_TIME_DEP <- mdy_hm(raw_data$DATE_TIME_DEP) raw_data$DATE_TIME_RETR <- mdy_hm(raw_data$DATE_TIME_RETR) # DEP and RETR date + times are now set as "instants" in DF and can be manipulated is.instant(raw_data$DATE_TIME_DEP) is.instant(raw_data$DATE_TIME_RETR) # How long were they deployed for? raw_data$DUR <- raw_data$DATE_TIME_RETR - raw_data$DATE_TIME_DEP # Round to 2 decimal places raw_data$DUR <- round(raw_data$DUR, digits = 2) #Change from difftime object to numeric object raw_data$DUR <- as.numeric(raw_data$DUR) is.numeric(raw_data$DUR) #True ##### NEW VARIABLE ##### #New camera presence variable, should be an easier way to do this using an ifelse statement raw_data <- raw_data %>% mutate(CAM_PRES = ifelse(TRAP == 1 | TRAP == 2, "Y", "N")) #ifelse statement worked out with Brett raw_data$CAM_PRES <- as.factor(raw_data$CAM_PRES) is.factor(raw_data$CAM_PRES) raw_data$CAM_PRES <- as.factor(raw_data$CAM_PRES) is.factor(raw_data$CAM_PRES) ##### SUBSET DATAFRAME ##### # Make a combined Fair Haven location raw_data <- raw_data %>% mutate(LOCAL_COMB = LOCAL) raw_data$LOCAL_COMB[raw_data$LOCAL_COMB == "Fair Haven (2)"] <- "Fair Haven" # Subset Fair Haven location FH <- raw_data FH <- subset(FH, LOCAL_COMB == "Fair Haven") # Subset Port Harmon location PH <- raw_data PH <- subset(PH, LOCAL_COMB == "Port Harmon") # Subset for Port Harmon and Fair Haven combined FH_PH <- raw_data FH_PH <- subset(FH_PH, LOCAL_COMB == "Fair Haven" | LOCAL_COMB == "Port Harmon") ######################### ### EXPLORATORY PLOTS ### ######################### # ALL SITES plot(raw_data$DUR, raw_data$GC_TOTAL, xlab = "Duration (Hr)", ylab = "Total Green Crab (#)", main = "Green crab catch vs. duration for all sites") #plot all sites vs. catch ALL_DUR_PLOT <- ggplot(raw_data, aes(DUR, GC_TOTAL)) ALL_DUR_PLOT + geom_point(aes(colour = factor(LOCAL_COMB), size = 10)) + labs( x = "Duration (hrs)", y = "Total Green Crab Catch", title = "Duration vs. Catch - All Sites") FH_PH_DUR_PLOT <- ggplot(FH_PH, aes(DUR, GC_TOTAL)) FH_PH_DUR_PLOT + geom_point(aes(colour = factor(LOCAL_COMB), size = 10)) + labs( x = "Duration (hrs)", y = "Total Green Crab Catch", title = "Duration vs. Catch - Fair Haven and Port Harmon, NL") #FAIR HAVEN (COMBINED) plot(FH$DUR, FH$GC_TOTAL, xlab = "Duration (Hr)", ylab = "Total Green Crab (#)", main = "Green crab catch vs. duration for Fair Haven, NL") #Fair haven 1 vs 2 (To investigate whether clustering was due to different sampling times) plot(FH$LOCAL, FH$GC_TOTAL, xlab = "Duration (Hr)", ylab = "Total Green Crab (#)", main = "Green crab catch vs. duration for Fair Haven, NL") FH_DUR_PLOT <- ggplot(FH, aes(DUR, GC_TOTAL)) FH_DUR_PLOT + geom_point(aes(colour = factor(LOCAL_COMB), size = 10)) + labs( x = "Duration (hrs)", y = "Total Green Crab Catch", title = "Duration vs. Catch - Fair Haven, NL") #PORT HARMON plot(PH$DUR, PH$GC_TOTAL, xlab = "Duration (Hr)", ylab = "Total Green Crab (#)", main = "Green crab catch vs. duration for Port Harmon, NL") PH_DUR_PLOT <- ggplot(PH, aes(DUR, GC_TOTAL)) PH_DUR_PLOT + geom_point(aes(colour = factor(LOCAL_COMB), size = 10)) + labs( x = "Duration (hrs)", y = "Total Green Crab Catch", title = "Duration vs. Catch - Port Harmon, NL") #PORT HARMON AND FAIR HAVEN par(mar = c(4.5, 4.5, 1, 1)) par(cex = 1.3) palette(c("grey50", "red", "firebrick")) plot(FH_PH$DUR, FH_PH$GC_TOTAL, xlab = "Duration (Hr)", ylab = "Number of green crab captured", col =FH_PH$LOCAL_COMB, pch = 19, cex = 1.2) axis(1, at=c(0:25)) legend(x=8, y= 300, legend=c("Port Harmon, NL", "Fair Haven, NL"), bty = "n", pch = 19, pt.cex = 1.2, col=c("grey50","firebrick"), xpd = NA, cex = 1.0) ##################################################################################################### ####################### ### MS 1 - FIGURE 4 ### ####################### par(def) # START # png(filename="Figure 4.png", width=1900, height=1100, res=200) catch_site_plot <- filter(raw_data, LOCAL_COMB == "Fair Haven" | LOCAL_COMB == "Port Harmon") catch_site_plot <- droplevels(catch_site_plot) #Drop unused factor labels that showed up on plot despite no longer being in dataframe levels(catch_site_plot$LOCAL_COMB) catch_site_plot$LOCAL_COMB <- revalue(catch_site_plot$LOCAL_COMB, c("Fair Haven" = "Fair Haven, NL", "Port Harmon" = "Little Port Harmon, NL")) par(mfrow = c(2,2)) par(mar = c(3.0, 4.0, 1, 1)) par(mgp=c(2,1,0)) par(cex = 1.1) ##### A. Duration vs. Catch Plot Revised - FH ##### #FAIR HAVEN palette(c("grey50", "red", "firebrick")) plot(FH$DUR, FH$GC_TOTAL, xlab = "Duration (h)", ylab = "Green crabs / trap", col = "firebrick", pch = 19, axes = FALSE) title("A", line = -1.8, adj = 0.8, col.main= "red", cex.main = 2) axis(side=1, at=c(21:25, by = 1)) axis(side=2, at=seq(0, 300, by=50)) box() # n = 16 ##### B. Duration vs. Catch Plot Revised - PH ##### #PORT HARMON palette(c("grey50", "red", "firebrick")) plot(PH$DUR, PH$GC_TOTAL, xlab = "Duration (h)", ylab = "Green crabs / trap", col = "grey50", pch = 19, cex = 1.2, axes = FALSE) title("B", line = -1.8, adj = 0.8, col.main= "red", cex.main = 2) axis(side=1, at=c(7:15, by = 1)) axis(side=2, at=seq(0, 100, by=20)) box() # n = 13 ##### C. Plot of Site (x) vs. Duration (Y) ######## plot(catch_site_plot$LOCAL_COMB, catch_site_plot$DUR, ylab = "Duration (h)", col = c("firebrick", "grey50")) title("C", line = -1.8, adj = 0.8, col.main= "red", cex.main = 2) ##### D. Plot of catch vs. Site ############ plot(catch_site_plot$LOCAL_COMB, catch_site_plot$GC_TOTAL, ylab = "Green crabs / trap", col = c("firebrick", "grey50", "white", "white", "white", "white")) title("D", line = -1.8, adj = 0.8, col.main= "red", cex.main = 2) dev.off() # END # ################################################################################# ################################ ### EXPLORATORY PLOTS CONT'D ### ################################ par(def) #ALL SITES hist(raw_data$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for all sites", breaks = 10) hist(raw_data$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for all sites", breaks = 20) #FAIR HAVEN (COMBINED) hist(FH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Fair Haven, NL", breaks = 10) hist(FH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Fair Haven, NL", breaks = 20) #PORT HARMON hist(PH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Port Harmon, NL", breaks = 10) hist(PH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Port Harmon, NL", breaks = 20) #PORT HARMON AND FAIR HAVEN hist(FH_PH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Port Harmon & Fair Haven, NL", breaks = 10) hist(FH_PH$GC_TOTAL, xlab = "Total Green Crab (#)", ylab = "Frequency", main = "Green crab catch frequencies for Port Harmon & Fair Haven, NL", breaks = 20) #Camera vs. No-Camera #FAIR HAVEN (COMBINED) beanplot(FH$GC_TOTAL ~ FH$CAM_PRES, data = FH, ylim=c(0,500), xlab = "Camera Presence", ylab = "Green Crab Catch / Pot", col=c("gray", "black", "black", "blue")) beanplot(FH$GC_TOTAL ~ FH$CAM_PRES, data = FH) #PORT HARMON beanplot(PH$GC_TOTAL ~ PH$CAM_PRES, data = PH, ylim=c(0,200), xlab = "Camera Presence", ylab = "Green Crab Catch / Pot", col=c("gray", "black", "black", "blue")) beanplot(PH$GC_TOTAL ~ PH$CAM_PRES, data = PH) #PORT HARMON & FAIR HAVEN beanplot(FH_PH$GC_TOTAL ~ FH_PH$CAM_PRES, data = PH, xlab = "Camera Presence", ylab = "Green Crab Catch / Pot", main = "Camera vs. No Camera in Port Harmon & Fair Haven, NL") #To visualize differences in mean catch between the two sites... beanplot(FH_PH$GC_TOTAL ~ FH_PH$LOCAL_COMB, xlab = "Camera Presence", ylab = "Green Crab Catch / Pot", main = "Camera vs. No Camera in Port Harmon & Fair Haven, NL") #Make an asym. beanplot as an all-in-one graph depicting: # 1) make the point about the camera vs. no-camera comparison # 2) Show how the sites vary ASYM_BEAN <- FH_PH ASYM_BEAN <- subset(ASYM_BEAN, select = c("GC_TOTAL", "CAM_PRES", "LOCAL_COMB")) ASYM_BEAN ASYM_BEAN <- droplevels(ASYM_BEAN) ASYM_BEAN$LOCAL_COMB ASYM_BEAN <- ASYM_BEAN %>% mutate(PROPER_NAME = ifelse(LOCAL_COMB == "Fair Haven", "Fair Haven, NL", "Little Port Harmon, NL")) #ifelse statement worked out with Brett #Subset dataframes based on site and camera presence FH_NC <- ASYM_BEAN$GC_TOTAL[ASYM_BEAN$LOCAL_COMB == "Fair Haven" & ASYM_BEAN$CAM_PRES == "N"] FH_C <- ASYM_BEAN$GC_TOTAL[ASYM_BEAN$LOCAL_COMB == "Fair Haven" & ASYM_BEAN$CAM_PRES == "Y"] PH_NC <- ASYM_BEAN$GC_TOTAL[ASYM_BEAN$LOCAL_COMB == "Port Harmon" & ASYM_BEAN$CAM_PRES == "N"] PH_C <- ASYM_BEAN$GC_TOTAL[ASYM_BEAN$LOCAL_COMB == "Port Harmon" & ASYM_BEAN$CAM_PRES == "Y"] ### SPLIT BEAN PLOT OF CAMERA VS. NO CAMERA CATCH ##### par(lend = 0, mai = c(0.8, 1.4, 0.2, 0.5)) beanplot(GC_TOTAL ~ CAM_PRES + PROPER_NAME, data = ASYM_BEAN, ylab = "Total Green Crab / Pot", ylim=c(0,420), side = "both", cex.lab = 1.2, cex.axis = 1.2, border = "black", innerborder ="black", beanlinewd = 3.5, overallline = "mean", col = list("grey50", c("firebrick", "black"))) minor.tick(nx=0, ny=5, tick.ratio = 0.5) legend(2.0, 420, border = "black", bty = "n", cex = 1.2, fill = c("grey50", "firebrick"), legend = c("No Camera", "Camera")) ####################### ### MS 1 - FIGURE 6 ### ####################### # START # png(filename="Figure 6.png", width=1500, height=1000, res=200) par(def) par(lend = 0, mai = c(0.8, 1.4, 0.2, 0.5)) par(mar = c(3.0, 4.8, 1, 1)) beanplot(GC_TOTAL ~ CAM_PRES + PROPER_NAME, data = ASYM_BEAN, ylab = "Total green crabs / trap", ylim=c(0,420), side = "both", what = c(0,0,0,0), cex.lab = 1.5, cex.axis = 1.5, border = NA, innerborder = NA, beanlinewd = 3.5, overallline = "mean", col = list("grey50", c("firebrick", "black"))) abline(h = mean(ASYM_BEAN$GC_TOTAL), col="black", lty=5, lwd=2) beanplot(GC_TOTAL ~ CAM_PRES + PROPER_NAME, data = ASYM_BEAN, ylab = "Total green crabs / trap", ylim=c(0,420), side = "both", add = T, what = c(0,1,0,0), cex.lab = 1.5, cex.axis = 1.5, border = NA, innerborder = NA, beanlinewd = 3.5, overallline = "mean", col = list("grey50", c("firebrick", "black"))) beanplot(GC_TOTAL ~ CAM_PRES + PROPER_NAME, data = ASYM_BEAN, ylab = "Total green crabs / trap", ylim=c(0,420), side = "both", add = T, what = c(0,0,1,1), cex.lab = 1.5, cex.axis = 1.5, border = NA, innerborder = NA, beanlinewd = 3.5, overallline = "mean", col = list("grey50", c("firebrick", "black"))) minor.tick(nx=0, ny=5, tick.ratio = 0.5) legend(1.5, 400, border = NA, x.intersp = 0.5, bty = "n", cex = 1.5, fill = c("grey50", "firebrick"), legend = c("Camera absent", "Camera present")) dev.off() # END # par(def) ############################### ### CAMERA EFFECTS - MODELS ### ############################### ### MODEL 1 - ORIGINAL COMBINED ### model1 <- lme(data = FH_PH, GC_TOTAL ~ CAM_PRES + LOCAL_COMB, random = ~ 1 | DEP) #final model combining both sites into one model summary(model1) plot(model1) res1 <- resid(model1) fit1<-fitted(model1) plot(fit1,res1) qqnorm(res1) qqline(res1) hist(res1) ### MODEL - FAIR HAVEN MODEL ### FH_model <- lme(data = FH, GC_TOTAL ~ CAM_PRES, random = ~ 1 | DEP) summary(FH_model) plot(FH_model) res_FH <- resid(FH_model) fit_FH <-fitted(FH_model) plot(fit_FH, res_FH) qqnorm(res_FH) qqline(res_FH) hist(res_FH) #### PORT HARMON MODEL #### PH_model <- lme(data = PH, GC_TOTAL ~ CAM_PRES, random = ~ 1 | DEP) summary(PH_model) plot(PH_model) res_PH <- resid(PH_model) fit_PH <-fitted(PH_model) plot(fit_PH, res_PH) qqnorm(res_PH) qqline(res_PH) hist(res_PH) ###### TRY ADDING IN DURATION ###### ########################################### #### MS1 - CAMERA EFFECTS FINAL MODELS #### ########################################### # Mean and SD deployment time for FH and PH # Fair Haven mean(FH$DUR) # mean = 22.9 sd(FH$DUR) # 0.8 psych::describe(FH$DUR) # range = 21.8 - 24.3 # Port Harmon mean(PH$DUR) # 11.1 sd(PH$DUR) #2.8 psych::describe(PH$DUR) # 7.4 - 14.1 # START # ### FAIR HAVEN MODEL 2 ### FH_model2 <- lme(data = FH, GC_TOTAL ~ CAM_PRES + DUR, random = ~ 1 | DEP) summary(FH_model2) # Intercept is the number of green crab captured per deployment when no camera is present (CAM_PRES = N, comes before Y in the alphabet) # Therefore, when no camera is present at Fair Haven, there are 46.5 gc/trap # When the camera is present (CAM_PRES = Y), there are (+)19.4 gc/trap more than the intercept # https://www.youtube.com/watch?v=VhMWPkTbXoY plot(FH_model2) res_FH2 <- resid(FH_model2) fit_FH2 <-fitted(FH_model2) plot(fit_FH2, res_FH2) qqnorm(res_FH2) qqline(res_FH2) hist(res_FH2) #### PORT HARMON MODEL 2 #### PH_model2 <- lme(data = PH, GC_TOTAL ~ CAM_PRES + DUR, random = ~ 1 | DEP) summary(PH_model2) plot(PH_model2) res_PH2 <- resid(PH_model2) fit_PH2 <-fitted(PH_model2) plot(fit_PH2, res_PH2) qqnorm(res_PH2) qqline(res_PH2) hist(res_PH2) # END # # Start of new code added on 11/12/2017 ################################################################################### ### Try adding the the effect of time of day to model in the form of video type ### ################################################################################### PH_model3 <- lme(data = PH, GC_TOTAL ~ CAM_PRES + DUR + SET_TYPE, random = ~ 1 | DEP) summary(PH_model3) plot(PH_model3) res_PH3 <- resid(PH_model3) fit_PH3 <-fitted(PH_model3) plot(fit_PH3, res_PH3) qqnorm(res_PH3) qqline(res_PH3) hist(res_PH3) # End of new code added on 11/12/2017 ##################################################################### ##################################################################### ##### Descriptive statistics for various values reported in MS1 ##### ##################################################################### ##################################################################### # Standard error function std <- function(x) sd(x)/sqrt(length(x)) #Made a new variable called REC_DUR that only had camera traps, and any deployment duration over 13 hours was marked as a record time of 13 raw_data <- raw_data %>% mutate(REC_DUR = DUR) raw_data$REC_DUR[raw_data$REC_DUR > 13] <- 13 raw_data$REC_DUR[raw_data$CAM_PRES == "N"] <- NA ##### Number of camera deployments vs. non-camera deployments ##### plyr::count(raw_data$CAM_PRES) # N = 39 # Y = 39 ##### Deployment duration stats #### range(raw_data$DUR) # range = 2.7 to 24.4 psych::describe(raw_data$DUR) # mean = 14.2 sd(raw_data$DUR) #SD = 6.1 std.error(raw_data$DUR) # S.E. = 0.7 ##### Recording duration stats ##### # Mean record time psych::describe(raw_data$REC_DUR) # record duration 11.22 +/- 0.4 ##### GC catch stats at all sites #### describe(raw_data$GC_TOTAL) #mean, min, max, se etc. # summaryBy Function guide # # Summarized_Data <- summaryBy(Summarize Column A + Column B + Column N... ~ According to Column X, data = Base_Data, FUN = c(Function 1, Function 2...)) ########################### ### CONTENT FOR TABLE 1 ### ########################### gc_catch_summary <- summaryBy(GC_TOTAL ~ LOCAL_COMB, data = raw_data, FUN = c(mean, std, sd, min, max, sum)) gc_catch_summary ### FH Green crab catch summary ### FH_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Fair Haven"] #GC total catch at just fair haven psych::describe(FH_DESC) sum(FH_DESC) ### PH Green crab catch summary ### PH_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Port Harmon"] #GC total catch at just port harmon psych::describe(PH_DESC) sum(PH_DESC) ### BB Green crab catch summary ### BB_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Bonne Bay"] #GC total catch at just Bonne Bay psych::describe(BB_DESC) sum(BB_DESC) ### LHE Green crab catch summary ### LHE_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Little Harbour East"] # GC total ...... psych::describe(LHE_DESC) sum(LHE_DESC) ### BH Green crab catch summary ### BH_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Boat Harbour"] # GC total......... psych::describe(BH_DESC) sum(BH_DESC) ### PA Green crab catch summary ### PA_DESC <- raw_data$GC_TOTAL[raw_data$LOCAL_COMB == "Penguin Arm"] #Gc total....... PA_DESC psych::describe(PA_DESC) ### ALL SITES Green crab catch summary ### psych::describe(raw_data$GC_TOTAL) #grand total of all GC caught sum(raw_data$GC_TOTAL) ################################################ #### camera vs. no camera descriptive stats #### ################################################ FH_CAM <- dplyr::filter(FH, CAM_PRES == "Y") FH_NO_CAM <- dplyr::filter(FH, CAM_PRES == "N") PH_CAM <- dplyr::filter(PH, CAM_PRES == "Y") PH_NO_CAM <- dplyr::filter(PH, CAM_PRES == "N") psych::describe(FH_CAM$GC_TOTAL) # mean = 140.9, SD = 99.6 psych::describe(FH_NO_CAM$GC_TOTAL) # mean = 122.6, SD = 80.9 psych::describe(PH_CAM$GC_TOTAL) # mean = 26.3, SD =26.3 psych::describe(PH_NO_CAM$GC_TOTAL) # mean = 42.5, SD = 34.4 describe(FH_PH$GC_TOTAL) # ALL IN ONE TABLE FH_PH_CAM_NOCAM_STATS <- summaryBy(GC_TOTAL ~ CAM_PRES + LOCAL_COMB, data = FH_PH, FUN = c(mean, std)) FH_PH_CAM_NOCAM_STATS ####################################### ### Table 2 - Bycatch investigation ### ####################################### Bycatch <- summaryBy(BC_RC + BC_FL + BC_CUN + BC_SC + BC_AE +GC_TOTAL ~ LOCAL_COMB, data = raw_data, FUN = sum) Bycatch <- t(Bycatch) Bycatch