# Code for Cost-effort analysis of ## # Baited Remote Underwater Video (BRUV) and ## # environmental DNA (eDNA) in monitoring marine ## # ecological communities ## #Alice Julia Clark, Sophie Atkinson, Valentina Scarponi, Tim Cane, Nathan R. Geraldi, Ian Hendy, Reuben Shipway, Mika Robert Peck ### check spatial autocorrelation #### #libraries library (vegan) # 2.6.4 library (fossil) # 0.4.0 library(adespatial) # 0.3.21 library(ade4) # 1.7.22 #load df with all data df <- read.csv("eDNA_allvariables.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T) #subset dataset into GPS coordinates, species and env variables site.xy <- df[,7:8] spe <- df[, 10:81] env <- df[,2:6] spe.hel <- decostand(spe, "hel") #step 1 construct matrix of dbMEM variables kelp.dbmem.tmp <- dbmem(site.xy, silent = F) #silent = F allows fc to display the truncation level on screen, this level is a bit larger than the largest distance among sites that keeps all sites connected in a min spanning tree kelp.dbmem <- as.data.frame(kelp.dbmem.tmp) #truncation distance used above (thr <- give.thresh(dist(site.xy))) #3.01 #display and count the eigenvalues attributes(kelp.dbmem.tmp)$values length(attributes(kelp.dbmem.tmp)$values) # as we can see, there are 5 dbMEM eigenvectors with positive spatial correlation. Prior to our RDA we will apply forward selection with Blanchet et al double stopping criterion ##Step 2: run the global dbMEM analysis on the detrended hellinger-transformed data (kelp.dbmem.rda <- rda(spe.hel ~ ., kelp.dbmem)) anova(kelp.dbmem.rda) #p=0.029 ##Step 3 since the R-asqure is (almost) sig, compute the adjusted R2 and run a forward selection of the dbmem variables (kelp.R2a <- RsquareAdj(kelp.dbmem.rda)$adj.r.squared) #0.04791317 (kelp.dbmem.fwd <- forward.sel(spe.hel, as.matrix(kelp.dbmem), adjR2thresh = kelp.R2a)) (nd.sig.dbmem <- nrow(kelp.dbmem.fwd)) # number of signif. dbMEM = 1 #identity of the significant dbMEM in increasing order (dbmem.sign <- sort(kelp.dbmem.fwd[ ,2])) #4 #write the sig dbMEM to a new object dbmem.red <- kelp.dbmem[, c(dbmem.sign)] ## Step 4. New dbMEM analysis with 8 sig dbMEM variables ## adjusted R-square after forward selection: R2adj = (kelp.dbmem.rda2 <- rda(spe.hel ~ ., data = dbmem.red)) (kelp.fwd.R2a <- RsquareAdj(kelp.dbmem.rda2)$adj.r.squared) #0.04 anova(kelp.dbmem.rda2) (axes.test <- anova(kelp.dbmem.rda2, by = "axis")) #number sig axes (nb.ax <- length(which(axes.test[ , ncol(axes.test)] <= 0.05))) ## Step 5. Plot the significant canonical axes kelp.rda2.axes <- scores(kelp.dbmem.rda2, choices = c(1:nb.ax), display = "lc", scaling = 1) par(mfrow = c(1, nb.ax)) for(i in 1:nb.ax){ s.value(site.xy, kelp.rda2.axes[ ,i], sub = paste("RDA",i), csub = 2) } #now check if variation is related to environmental variables using function lm and shapiro test #interpreting the spatial variation: regression of the significant canonical axes on the environmental variables, with Shapiro-Wilk normality tests of residuals kelp.rda2.axis1.env <- lm(kelp.rda2.axes[,1] ~ ., data = env) shapiro.test(resid(kelp.rda2.axis1.env)) # p=0.8117 summary(kelp.rda.axis1.env) #none are sig ##other axes weren't significant, therefore not explained by env variables # scalog function source('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumE colR2/scalog.R') scalog(kelp.dbmem.rda) ##works now so dont need this... ## Step 4. New dbMEM analysis with 8 sig dbMEM variables ## adjusteed R-square after forward selection: R2adj = #(kelp.fwd.R2a <- RsquareAdj(kelp.dbmem.rda)$adj.r.squared) #0.04 #anova(kelp.dbmem.rda) #p=0.0055 #(axes.test <- anova(kelp.dbmem.rda, by = "axis")) #first axis almost sig #(nb.ax <- length(which(axes.test[ , ncol(axes.test)] <= 0.05))) #none of axes are significant ## Step 5. Plot the significant canonical axes #kelp.rda.axes <- # scores(kelp.dbmem.rda, # choices = c(1:nb.ax), # display = "lc", # scaling = 1) #par(mfrow = c(1, nb.ax)) #for(i in 1:nb.ax){ # s.value(site.xy, kelp.rda.axes[ ,i], # sub = paste("RDA2",i), # csub = 2) #} #now check if variation is related to environmental variables using function lm and shapiro test #interpreting the spatial variation: regression of the significant canonical axes on the environmental variables, with Shapiro-Wilk normality tests of residuals kelp.rda.axis1.env <- lm(kelp.rda.axes[,1] ~ ., data = env) shapiro.test(resid(kelp.rda.axis1.env)) # p=0.089 summary(kelp.rda.axis1.env) #none are sig ##other axes weren't significant ##above process is very long so instead can use quickMEM #The function takes the data and tests if there is a significant trend, if it finds one it detrends the data and it goes on and then after that it computes the dbMEM eigenfunctions and retains those with positive Morans I (i.e. positive spatial correlation) #then it tests RDA with all the dbMEM, if not significant then it stops there - so no significant spatial structure in your data so it will stop there - no need to carry on #if it is significant then it continues and runs a forward selection of the dbMEM with the Blancher et al double stopping criterion so takes adjusted R2 of global RDA. When it has selected some variables in runs a new RDA with the significant dbMEM and it tests the RDA with those ones and specifically it tests the axes to see how many of those axes are significant and then it draws the maps and the output object contains all the results! Results of RDAs and forward selection etc. #if you want to split dbMEM into groups have to do that manually! # quickMEM source('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumE colR2/quickMEM.R') # doesnt work anymore #but downloading and saving funcion has worked!! download.file('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumEcolR2/quickMEM.R', destfile = 'quickMEM.R') source('quickMEM.R') quick1 <- quickMEM(spe.hel, site.xy) quick1 #Truncation level = 0.1110089 #Time to compute dbMEMs = 0.000000 sec ------------------------------------------------------------------ # *** Procedure stopped *** # p-value of global test: 0.936 #No significant spatial structure detected by global dbMEM analysis. #Selection of dbMEM variables would lead to spurious model. #### now with just BRUV data ## rm(list = ls()) #load data spp.list <- read.csv("spe_BRUV_eDNA.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T) #load env data env <- read.csv("env_spa_BRUV_eDNA.csv", stringsAsFactors = TRUE, fileEncoding = "UTF-8-BOM") BRUVspp <- spp.list[1:27,3:78] #remove columns with sum=0 library(tidyverse) spe <- BRUVspp %>% select_if(~sum(.) !=0) BRUVenv <- env[1:27,3:7] spa <- env[1:27, 10:11] #helinger transform BRUV data BRUV.hel <- decostand(spe, "hel") # quickMEM source('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumE colR2/quickMEM.R') quick2 <- quickMEM(BRUV.hel, spa) #with full dataset## rm(list = ls()) #load data spp.list <- read.csv("spe_BRUV_eDNA.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T) #load env data env <- read.csv("env_spa_BRUV_eDNA.csv", stringsAsFactors = TRUE, fileEncoding = "UTF-8-BOM") site.xy = env[,10:11] envv = env[, 3:7] spp.list <- spp.list[, 3:78] #helinger transform BRUV data ALL.hel <- decostand(spp.list, "hel") # quickMEM source('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumE colR2/quickMEM.R') quickAll <- quickMEM(ALL.hel, site.xy) ### eDNA vs BRUV iNEXT (q=0) #### #clear R rm(list = ls()) #libraries library(ggplot2) # 3.4.2 library(iNEXT) # 3.0.0 #Load in data #just bruv BRUV <- read.csv("iNEXT_BRUV.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, row.names = 1) #just eDNA eDNA <- read.csv("iNEXT_eDNA.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, row.names = 1) #make matrix from BRUV as integer BRUV_mat <- as.matrix(sapply(BRUV, as.integer)) #make eDNA a matrix eDNA_mat <- as.matrix(sapply(eDNA, as.integer)) #create list of the matrices listy = list("BRUV" = BRUV_mat, "eDNA" = eDNA_mat) # have a look at the raw data with species richness q=0 out.raw <- iNEXT(listy, q=0, datatype="incidence_raw") out.raw #plot accumulation/rarefaction curve ggiNEXT(out.raw, type = 1) + theme_bw() ## fig. 3 #plot with sample coverage as percentage (appendix fig1) ggiNEXT(out.raw, type = 2) + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + theme_bw() out.raw$DataInfo # Assemblage T U S.obs SC Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 #1 BRUV 27 90 25 0.9402 6 9 1 1 2 3 0 0 1 1 #2 eDNA 27 553 72 0.9630 21 7 5 4 1 3 4 4 0 2 out.raw$AsyEst #Assemblage Diversity Observed Estimator s.e. LCL UCL #1 BRUV Species richness 25.00000 26.92593 6.529798 25.00000 39.72410 #2 BRUV Shannon diversity 18.52058 21.47778 1.931048 17.69299 25.26256 #3 BRUV Simpson diversity 14.72727 16.95652 1.878721 13.27430 20.63875 #4 eDNA Species richness 72.00000 102.33333 19.959368 72.00000 141.45298 #5 eDNA Shannon diversity 45.23898 48.99568 1.882363 45.30632 52.68504 #6 eDNA Simpson diversity 36.27197 37.38039 1.267206 34.89672 39.86407 ##compute species diversity with estimateD estimateD(listy, q=0, datatype = "incidence_raw", base="coverage") estimateD(listy, q=0, datatype = "incidence_raw", base="size", level = NULL) ### ### Primer comparison #### library(vegan) pri <- read.csv("MiFish_Valsecchi.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, header = T) nmds <- metaMDS(pri[1:54,3:89]) #method of calculating species scores plot(nmds) ## APPENDIX FIGURE 2 # Plot NMDS points colored by treatment ordiplot(nmds, type = "n") # plot the nmds axes first points(nmds, display = c("sites"), #add the site points col = as.numeric(as.factor(pri$Primer))) #colour the sites by treatment #add legend to the plot ordihull(nmds, groups = pri$Primer, draw = "polygon", lty = 1, col = "lightgrey") legend("bottomright", #legend position legend = levels(factor(pri$Primer)), #what the legend is showing col = 1:length(levels(factor(pri$Primer))), #what it is coloured by pch = 1) #the shape of the points #species plot ordiplot(nmds, type = "n") # plot the nmds axes first points(nmds, display = c("species"), #add the site points col = as.numeric(as.factor(pri$Primer))) #add legend to the plot legend("bottomright", #legend position legend = levels(factor(pri$Primer)), #what the legend is showing col = 1:length(levels(factor(pri$Primer))), #what it is coloured by pch = 1) #the shape of the points plot(nmds, type = "t") #betadisper ### ## Bray-Curtis distances between samples dis <- vegdist(pri[1:54,3:89], method = "jaccard") ## Calculate multivariate dispersions mod <- betadisper(dis, pri[1:54,]$Primer) #this gives output but not super helpful mod #so can check significance with function anova anova(mod) # there is significant variation in the beta dispersion between the methods ## Permutation test for F - same result but on a permutation based test - there is variation between method types permutest(mod, pairwise = TRUE, permutations = 99) ## Tukey's Honest Significant Differences (mod.HSD <- TukeyHSD(mod)) plot(mod.HSD) ## Plot the groups and distances to centroids on the ## first two PCoA axes plot(mod) ## APPENDIX Figure 3 ## can also specify which axes to plot, ordering respected plot(mod, axes = c(3,1)) ## Draw a boxplot of the distances to centroid for each group boxplot(mod) pri2 <- read.csv("MiFish_Valsecchi.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, header = T) #many glm on method library(mvabund) ## check sig #convert pres/abs data to mvabund object abun <- mvabund(pri2[,3:89]) #manyglm fits a generalised linear model to multivariate data many1 <- manyglm(abun ~ pri2$Primer, family = "negative.binomial") #plot resuduals vs fit model plot(many1) #check summary stats summary(many1) #looking to see if sig diff in method anova(many1) ### CCA BRUV vs eDNA ##### #load libraries library(tidyverse) library(vegan) #load in data #spe_BRUV_eDNA.csv is 2021 BRUV data and eDNA data - combined fish and vert assay with Swanage site spe <- read.csv("spe_BRUV_eDNA_29sites.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T) #load env data env <- read.csv("env_BRUV_eDNA_29sites.csv", stringsAsFactors = TRUE, fileEncoding = "UTF-8-BOM") #check if data is normal shapiro.test(env$DEPTH) shapiro.test(env$Tidal_stream) hist(env$Tidal_stream) #check row sums >0 rowSums(spe[,3:83]) #look at spe richness and chao diveresity index specpool(spe[,3:83]) str(spe) str(env) #set as numeric env$Northing <- as.numeric(env$Northing) env$Easting <- as.numeric(env$Easting) # run cca (c.cca <- cca(spe[,3:83] ~ METHOD + TREATMENT + log(Tidal_stream) + BIOTYPE + MACROALGAE + log(DEPTH) + Easting + Northing, env)) #summary of cca summary(c.cca) #check variation inflation factors vif.cca(c.cca) #check significance of cca anova.cca(c.cca) #non sig model #check significance of terms anova.cca(c.cca, by = "terms") #method & treatment sig #check which axis significance anova.cca(c.cca, by = "axis") #no axes are sig #ordistep for combined data mod0 <- cca(spe[,3:83] ~ 1, env) # Model with intercept only ## ordistep - forwards and backwards ## (With scope present, the default direction is "both") mod <- ordistep(mod0, scope = formula(c.cca), direction = "forward", pstep=1000) #parsimonious model = spe[, 3:36] ~ MEHTOD + MACROALGAE ##parsimonious model is with method + macroalgae (spe.cca.pars <- cca(spe[,3:83] ~ METHOD + MACROALGAE, data = env)) #check significance anova.cca(spe.cca.pars, step = 1000) #sig p=0.001** anova.cca(spe.cca.pars, step = 1000, by = "axis") #first 2 axes is sig anova.cca(spe.cca.pars, step = 1000, by = "term") # macroalgae significant, p=0.011** #method sig, p=0.001 # final check comparing if any significant difference between full model and parsinomious model anova.cca(spe.cca.pars, c.cca)#not sig so keep simpler model = parsi model! #graph for parimonious model plot(spe.cca.pars, type = "point", main = "Triplot CCA spe ~ env2 - scaling 2") #text plot(spe.cca.pars, display = "species", type = "t", main = "Triplot CCA spe ~ env2 - scaling 2") # flag for BRUV sites Methods <- spe$METHOD == "BRUV" ## FIGURE 4 ## blank plot for parsi cca plot ## plot(spe.cca.pars, type="n", xlab="CCA Axis 1", ylab= "CCA Axis 2", main= "CCA BRUV & eDNA") # show sites and colour by method points(spe.cca.pars, display= c("sites"), col=c("#56B4E9","#D55E00")[Methods + 1], pch=16, cex=1) text(spe.cca.pars, display = "cn") legend("bottomright", pch=c(16,16), col=c("#D55E00", "#56B4E9"), legend=c("BRUV", "eDNA"), bty = "n") #betadisper ## ## Bray-Curtis distances between samples dis <- vegdist(spe[,3:83], method = "jaccard") ## Calculate multivariate dispersions mod <- betadisper(dis, env$METHOD) #this gives output but not super helpful mod #so can check significance with function anova anova(mod) # there is significant variation in the beta dispersion between the methods ## Permutation test for F - same result but on a permutation based test - there is variation between method types permutest(mod, pairwise = TRUE, permutations = 99) ## Tukey's Honest Significant Differences (mod.HSD <- TukeyHSD(mod)) plot(mod.HSD) ## Plot the groups and distances to centroids on the ## first two PCoA axes plot(mod) ## can also specify which axes to plot, ordering respected plot(mod, axes = c(3,1)) ## Draw a boxplot of the distances to centroid for each group boxplot(mod) ### Valsecchi replicates analysis iNEXT + CCA #### ## load in data as 3 seperate dataframes and then combine them as a list rm(list = ls()) library(iNEXT) library(vegan) #dataset with 1st rep rep1 <- read.csv("verts_1rep.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, row.names = 1) #dataset with 1st 2 reps rep2 <- read.csv("verts_2reps.csv", fileEncoding = 'UTF-8-BOM', stringsAsFactors = T, row.names = 1) #dataset with all 3 reps rep3 <- read.csv("verts_3reps.csv", fileEncoding = "UTF-8-BOM", stringsAsFactors = T, row.names = 1) #make 3reps dataset a matrix as integers rep1_mat <- as.matrix(sapply(rep1, as.integer)) #make 2reps dataset a matrix as integers rep2_mat <- as.matrix(sapply(rep2, as.integer)) #make 3reps dataset a matrix as integers rep3_mat <- as.matrix(sapply(rep3, as.integer)) #create list of the matrices repslist = list("1_rep" = rep1_mat, "2_reps" = rep2_mat, "3_reps" = rep3_mat) # have a look at the raw data with species richness q=0 rep.out.raw <- iNEXT(repslist, q=0, datatype="incidence_raw") getOption("max.print") rep.out.raw # FIGURE 5 #plot accumulation/rarefaction curve ggiNEXT(rep.out.raw, type = 1) + theme_bw() #sample completeness curve ggiNEXT(rep.out.raw, type = 2) #sample completeness curve as percentage ggiNEXT(rep.out.raw, type = 2) + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + theme_bw() rep.out.raw$iNextEst$size_based rep.out.raw$iNextEst$coverage_based. #coverage-based R/E sampling curves ggiNEXT(rep.out.raw, type = 3) ## CCA valsecchi rm(list = ls()) #load libraries library(vegan) library(ade4) library(adespatial) library(mvabund) library(tidyverse) library(ggplot2) #load data #species data verts21 <- read.csv("Verts_21_evenreps.csv", header = T, fileEncoding = "UTF-8-BOM") #environmental dataset envverts <- read.csv("env_spatial_verts21.csv", header = T, fileEncoding = "UTF-8-BOM") #species richness specpool(verts21[,3:65]) #shannons index between treatment types for all data diversity(verts21[,3:65], index = "shannon", groups = envverts$BYELAW) # inside MCZ_K MCZ_S outside Swanage #3.168987 2.895669 3.308376 3.220883 3.165672 ##spatial autocorrelation # quickMEM source('https://raw.githubusercontent.com/zdealveindy/anadat-r/master/scripts/NumE colR2/quickMEM.R') spe <- verts21[, 3:65] %>% select_if(~sum(.) !=0) #helinger transform data hel <- decostand(spe, "hel") source('quickMEM.R') quick <- quickMEM(hel, envverts[, 9:10]) #p-value of global test: 0.14 #No significant spatial structure detected by global dbMEM analysis. #Selection of dbMEM variables would lead to spurious model. #looking at correlation between byelaw and depth kruskal.test(envverts$BYELAW~envverts$DEPTH) #boxplot looking at depth variability between treatment zones ggplot(envverts) + geom_boxplot(aes(BYELAW, DEPTH)) # full cca (c.cca <- cca(verts21[,3:65] ~ BYELAW + TIDAL_COEFF + BIOTYPE + MACROALGAE + DEPTH + Easting + Northing, envverts)) anova.cca(c.cca) # sig model p=0.001 anova.cca(c.cca, by = "terms") #byelaw and tidal coeff are sig and depth almost anova.cca(c.cca, by = "axis") #first axis is sig #check variation inflation factors vif.cca(c.cca) (c.cca2 <- cca(verts21[,3:65] ~ BYELAW + TIDAL_COEFF + BIOTYPE + MACROALGAE + DEPTH, envverts)) #as full model is significant we can check parsimonious model #check parsi model mod0 <- cca(verts21[,3:65] ~ 1, envverts[,c(4:8,11,12)]) #ordistep to find parsimonious model mod <- ordistep(mod0, scope = formula(c.cca2), direction = "forward", pstep=1000) #running parsimonious model found from last code (parsi <- cca(verts21[, 3:65] ~ DEPTH + MACROALGAE + BYELAW, envverts)) anova.cca(parsi) # sig model p=0.001 anova.cca(parsi, by = "terms") #depth p = 0.001, byelaw p=0.006 anova.cca(parsi, by = "axis") #first axis is sig #check variation inflation factors of parsimonious model vif.cca(parsi) ##plot triplot plot(parsi, type = "point", main = "Triplot CCA spe ~ Treatment + Depth, env2 - scaling 2") ## FIGURE 6 ordiplot(parsi, type = "n") # plot the axes first points(parsi, display = "sites", #add the site points col = as.numeric(as.factor(envverts$BYELAW))) #colour the sites by treatment #add hulls ordihull(parsi, groups = envverts$BYELAW, draw = "polygon", lty = 1, col = "lightgrey") #add legend to the plot legend("bottomright", #legend position legend = levels(factor(envverts$BYELAW)), #what the legend is showing col = 1:length(levels(factor(envverts$BYELAW))), #what it is coloured by pch = 1, #the shape of the points title = "Treatment") ## check sig #convert pres/abs data to mvabund object library(mvabund) abu <- mvabund(verts21[,3:65]) mod_1 <- manyglm(abu ~ envverts$BYELAW, family = "negative.binomial") #plot resuduals vs fit model plot(mod_1) #check summary stats summary(mod_1) #looking to see which treatment types are different to one another anova(mod_1) #betadisper ## ## Bray-Curtis distances between samples dis <- vegdist(verts21[,3:65], method = "jaccard") ## Calculate multivariate dispersions mod <- betadisper(dis, envverts$BYELAW) #this gives output but not super helpful mod #so can check significance with function anova anova(mod) # there is not significant variation in the beta dispersion between the treatments ## Permutation test for F - same result but on a permutation based test - there is variation between method types permutest(mod, pairwise = TRUE, permutations = 99) ## Tukey's Honest Significant Differences (mod.HSD <- TukeyHSD(mod)) plot(mod.HSD) ## Plot the groups and distances to centroids on the ## first two PCoA axes plot(mod) ## can also specify which axes to plot, ordering respected plot(mod, axes = c(3,1)) ## Draw a boxplot of the distances to centroid for each group boxplot(mod)