# Data analysis script for „Rainforest conversion to rubber and oil palm reduces abundances and diversity of canopy spiders„ submitted to PeerJ May 2022 # spider diversity in Efforts Indonesian project Z02 # samples collected by from Jochen Drescher # identification by Daniel Ramos and Mayanda Lia # data initially analysed in R 3.6.2 # script finalised 19 May 2022 in R 4.1.2 # assumes a project structure with data files located within the same directory as the script in a folder named 'Data' and saves output to a similar folder named 'Figures' # library ----------------------------------------------------------------- # devtools::install_github("mikabr/ggpirate") # devtools::install_github("tamarahartke/RankAbund") # devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis") # BiocManager::install("phyloseq") # dependency of metagMisc which needs to be installed separately # devtools::install_github("vmikk/metagMisc") # remotes::install_github("gavinsimpson/ggvegan") Packages <- c( "tidyverse", # v 1.3.1 "fitdistrplus", # v1.1-8 "outliers", # v0.15 "multcomp", # v1.4-19 "ggpirate", #v 0.1.2 "RankAbund", # v0.2.0 "vegan", # v2.6-2 "reshape2", # v1.4.4 "car", # v3.0-13 "DHARMa", # v0.4.5 "MuMIn", # v1.46.0 "betapart", # v1.5.6 "pairwiseAdonis", # v0.4 "metagMisc", # v0.0.4 "betareg", # v3.1-4 "lmtest", # v0.9-40 "tableone", # v0.13.2 "GGally", # v2.1.2 "ggvegan", # v2.1.2 "ggpubr", # v0.4.0 "emmeans", # v1.7.4-1 "patchwork" # v1.1.1 ) lapply(Packages, library, character.only= T) # data --------------------------------------------------- # community matrix with plot ID in the first column. Remaining columns contain the total number of individuals (adults and juveniles) from each taxon which were caught in canopy fogging traps situated beneath two target canopies per research plot, ie a total of 32, 1m² traps. There are two dataframes: one at the family level, one at the morphospecies level #### family-level dataset #### spiders_fam <- read.csv2("Data/EFForTS-Z02_Canopy.Spiders.2013_Abundance_by_family.csv") # add row names rownames(spiders_fam) <- spiders_fam$Row.Labels # Extracting landuse systems & landscape plot names LandUse <- factor(substr(spiders_fam$Row.Labels, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) LandScape <- factor(substr(spiders_fam$Row.Labels, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) LSLU <- factor(substr(spiders_fam$Row.Labels, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) spiders_fam <- spiders_fam[ ,-c(1)] # drop plot ID column #### morphospecies dataset #### # this includes all morphospecies without the juveniles which were identified only to family level spiders_spp <- read.csv("Data/EFForTS-Z02_Canopy.Spiders.2013_Abundance_by_morphospecies.csv") # add row names rownames(spiders_spp) <- spiders_spp$Row.Labels rownames(spiders_fam) == rownames(spiders_spp) # rows (plots) are in the same order; we do not need to create new landuse and landscape vectors spiders_spp <- spiders_spp[,-c(1)] # drop plot ID column # total (calculated) biomass of all spiders in mg/m^2, by plot biomass.df <- read.csv("Data/Biomass.csv") colnames(biomass.df)[2] <- 'TBM' # shorten the column name # plots are in the same order as the other dataframes biomass.df$LU <- factor(substr(biomass.df$Plot, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) biomass.df$LS <- factor(substr(biomass.df$Plot, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # housekeeping ------------------------------------------- # set random seed, so we'll get the same results if we run everything again set.seed(15) # system abbreviations, colors system_col = c("#15983DFF", "#0C5BB0FF", "#FEC10BFF", "#EE0011FF") system_col_double <- c(system_col, system_col) system_names = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm") LU_abb = c("F", "J", "R", "O") double_col <- rep(system_col, each = 2) # specs for a nicer-looking ggplot cleanup <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.text = element_text(colour = "black"), #axis.line = element_line(color = "black"), panel.border = element_rect(fill = NA), plot.margin = unit(c(4,4,4,4), "pt")) # Abundance analysis------------------------------------------------------ abund <- rowSums(spiders_fam)/32 # divide by number of 1m² traps to get individuals/m² med_abund <- tapply(abund, LandUse, median) mean_abund <- tapply(abund, LandUse, mean) sd_abund <- tapply(abund, LandUse, sd) # check distribution of the data descdist(abund, discrete = F) fit.norm <- fitdist(abund, "norm") fit.lnorm <- fitdist(abund, "lnorm") plot(fit.norm) plot(fit.lnorm) fit.norm$aic # 208.4912 fit.lnorm$aic # 202.7879 ## normal enough, but variance not homogeneous shapiro.test(abund) # p.value = 0,05184 qqnorm(abund) qqline(abund) bartlett.test(abund ~ LandUse) # p-value = 0.01753 bartlett.test(abund ~ LandScape) # p-value = 0.6469 bartlett.test(abund ~ interaction(LandUse,LandScape)) # p-value = 0.002458 ## log transformation improves the homogeneity of variance -> glm with log link function qqnorm(log(abund)) qqline(log(abund)) shapiro.test(log(abund)) # p.value = 0.747 bartlett.test(log(abund) ~ LandUse) # p-value = 0.353 bartlett.test(log(abund) ~ LandScape) # p-value = 0.1719 bartlett.test(log(abund) ~ interaction(LandUse,LandScape)) # p-value = 0.05636 # we'll include the interaction in the initial model interaction.plot(LandScape, LandUse, log(abund)) boxplot(log(abund) ~ LandUse * LandScape) #### step-wise simplification to find minimal adequate model #### abund_glm <- glm(abund~LandUse * LandScape, family = gaussian(link = log)) summary(abund_glm) # overdispersed? residual deviance >> degrees of freedom # model checking with DHARMa: dispersion tests look fine, so will continue anyway abu_sim <- simulateResiduals(abund_glm) testResiduals(abu_sim) abund_glm2 <- glm(abund~LandUse + LandScape, family = gaussian(link = log)) anova(abund_glm2, test = "F") abund_glm3 <- glm(abund~LandUse, family = gaussian(link = log)) anova(abund_glm3, test = "F") # does including landscape improve the model? no anova(abund_glm2, abund_glm3, test = 'Chisq') # save the results for writing the paper writeLines(capture.output(anova(abund_glm3, test = "F")), con="Figures/abund_glm3.txt") #### model checking #### plot(abund_glm3) # plots suggest potential problems with obser. BJ3, BJ5, HF4 # DHARMa abu_sim <- simulateResiduals(abund_glm3) testResiduals(abu_sim) plot(abu_sim) plotResiduals(abu_sim$scaledResiduals, LandUse) #### Abundance without influential observation HF4 #### # plot of the model suggests plot HF4 may skew our results, so we will compare previous model to one without HF4 (row 20). abu_out <- abund[-20] LS <- LandScape[-20] LU <- LandUse[-20] med_abund_out <- tapply(abu_out, LU, median) mean_abund_out <- tapply(abu_out, LU, mean) sd_abund_out <- tapply(abu_out, LU, sd) # does this change the distribution of the data? descdist(abu_out, discrete = F) fit.norm <- fitdist(abu_out, "norm") fit.lnorm <- fitdist(abu_out, "lnorm") plot(fit.norm) plot(fit.lnorm) fit.norm$aic # 193.6862 fit.lnorm$aic # 191.7096 ## variance problems shapiro.test(abu_out) # p-value = 0.155 qqnorm(abu_out) qqline(abu_out) bartlett.test(abu_out ~ LU) # p-value = 0.0568 bartlett.test(abu_out ~ LS) # p-value = 0.339 bartlett.test(abu_out ~ interaction(LU, LS)) # p-value = 0.02394 ## log transformation improves the homogeneity of variance -> glm with log link function shapiro.test(log(abu_out)) # p-val = 0.3908 bartlett.test(log(abu_out) ~ LU) # p-value = 0.2129 bartlett.test(log(abu_out) ~ LS) # p-value = 0.3199 bartlett.test(log(abu_out) ~ interaction(LU, LS)) # p-val = 0.06018 interaction.plot(LS, LU, abu_out) boxplot(abu_out~LU*LS) # testing differences between land uses and landscapes without HF4 abund_glm_out <- glm(abu_out ~ LU * LS, family = gaussian(link = log)) summary(abund_glm_out) anova(abund_glm_out, test = "F") ###### stepwise model simplification without H4 #### abund_glm2_out <- glm(abu_out~LU + LS, family = gaussian(link = log)) summary(abund_glm2_out) anova(abund_glm2_out, test = "F") # save output of final model without HF4 writeLines(capture.output(summary(abund_glm2_out)), con="Figures/abund_glm2_without_HF4_summary.txt") writeLines(capture.output(anova(abund_glm2_out, test = "F")), con="Figures/abund_glm2_without_HF4.txt") ###### checking model without HF4 #### plot(abund_glm2_out) # plots highlight observations BJ3, BJ5, (HJ4, HR2) # DHARMa abu_sim <- simulateResiduals(abund_glm2_out) testResiduals(abu_sim) plot(abu_sim) plotResiduals(abu_sim$scaledResiduals, LU) ## the potentially influential observations are extreme as HF4 was, so leave them in # save summary table with and without HF4 abund_summary <- rbind(mean_abund, med_abund, sd_abund, mean_abund_out, med_abund_out, sd_abund_out) write.csv(abund_summary, "Figures/abund_means_table.csv") #### Abundance Multiple comparisons #### # model without HF4 # no interaction between the factors, so set up the model with hypothesis for multiple comparisons abund_model <- glht(model = abund_glm2_out, linfct = mcp(LU = "Tukey")) # pairwise t.test with holm correction abund_HSD <- summary(abund_model, adjusted(type = c(p.adjust.methods = c("holm")))) write.csv(cbind(z_value = abund_HSD$test$tstat, p_value = abund_HSD$test$pvalues), "Figures/abund_HSD_withoutHF4.csv") # save test values for manuscript # for comparison, mult comps on model GLM2 (complete dataset): there is no difference in the pattern # no interaction between the factors, so set up the model with hypothesis for multiple comparisons abund_model_all <- glht(model = abund_glm3, linfct = mcp(LandUse = "Tukey")) # pairwise t.test with holm correction abund_HSD_all <- summary(abund_model_all, adjusted(type = c(p.adjust.methods = c("holm")))) write.csv(cbind(z_value = abund_HSD$test$tstat, p_value = abund_HSD$test$pvalues), "Figures/abund_HSD_fulldataset.csv") # save test values for manuscript # get the letters to add to the plot abund_lu_cld <- cld(abund_HSD, decreasing = T) # want forest first, which has highest values abu_cld <- abund_lu_cld$mcletters$Letters # and do this for landscape to see the estimated contrast and its std error abund_ls_model <- glht(model = abund_glm2_out, linfct = mcp(LS = "Tukey")) # pairwise t.test with holm correction abund_ls_HSD <- summary(abund_ls_model, adjusted(type = c(p.adjust.methods = c("holm")))) # Abundance plot ---------------------------------------------------------- # we use the plot without HF4 # make data ggplot2 friendly abu.df <- as.data.frame(abu_out) abu.df$LU <- LU abu.df$LS <- LS # pirate plot dissected: horizontal line = means, boxes = 95% confidence intervals, violins = density # landscapes pooled abu_pirate_pooled <- ggplot(abu.df, aes(x = LU, y = abu_out)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LU, fill = LU), violins = T, bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) + labs(y = expression("abundance, per "~m^2), x = NULL) + scale_x_discrete(labels = LU_abb) + theme(strip.background = element_rect(fill = "white", colour = "black")) + annotate( geom = "text", x = c(1:4), y = max(abu.df$abu_out)*1.05, label = abu_cld) ggsave(abu_pirate_pooled, filename = "Figures/abund_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) #### but for comparison, here is the plot with HF4 # make data ggplot2 friendly abund.df <- as.data.frame(abund) abund.df$LU <- LandUse abund.df$LS <- LandScape # pirate plot dissected: horizontal line = means, boxes = 95% confidence intervals, violins = density # landscapes pooled abu_pirate_withHF4 <- ggplot(abund.df, aes(x = LU, y = abund)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LU, fill = LU), violins = T, bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) + labs(y = expression("abundance, per "~m^2), x = NULL) + scale_x_discrete(labels = LU_abb) + theme(strip.background = element_rect(fill = "white", colour = "black")) # Rank Abundance --------------------------------------------------- ranks_all <- spec.ranks(spiders_fam, LandUse) ra_plot_all <- ra.plot(ranks_all, system_col) ggsave(ra_plot_all, filename = "Figures/rank_abund_all.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # radfit fits the data from each plot to each of five different models. The model with the lowest AIC best explains the data observed in that plot. rad <- radfit(spiders_fam) # non-convergence warnings -- it is a relatively small data set rad plot(rad) # compare the best model between land use systems rad_dev <- sapply(rad, function(x) unlist(lapply(x$models, deviance))) rad_dev <- t(rad_dev) rad_dev_long <- melt(rad_dev) rad_dev_long$LandUse <- factor(substr(rad_dev_long$Var1, 2, 2), # extract land use systems from Plot Names levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) rad_dev_long$LandScape <- factor(substr(rad_dev_long$Var1, 1, 1), # extract land use systems from Plot Names levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # differences in residual deviance between models by land use rad_aov <- lm(value ~ LandUse*LandScape + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models summary(rad_aov) summary(rad_aov)$fstatistic anova(rad_aov, test = 'F') rad_aov2 <- lm(value ~ LandUse+LandScape + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models anova(rad_aov2, test = 'F') rad_aov3 <- lm(value ~ LandUse + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models anova(rad_aov3, test = 'F') anova(rad_aov2, rad_aov3, test = "Chisq") # models do not differ significantly, so use the simpler # set up the model with hypothesis for multiple comparisons rad_model <- glht(model = rad_aov3, linfct = mcp(LandUse = "Tukey")) # pairwise t.test with holm correction rad_HSD <- summary(rad_model, adjusted(type = c(p.adjust.methods = c("holm")))) ### RESULT: rad_HSD # no differences between oil palm and rubber or Jungle rubber and forest (same models fit them equally well) # those two groups are significantly different from each other in the remaining pairwise comparisons writeLines(capture.output(rad_HSD), con="Figures/rad_HSD.txt") # save test values for manuscript # Richness ---------------------------------------------------------------- # number of species in each landscape numb_spider <- specnumber(spiders_spp) med_rich <- tapply(numb_spider, LandUse, median) mean_rich <- tapply(numb_spider, LandUse, mean) sd_rich <- tapply(numb_spider, LandUse, sd) rich_summary <- rbind(mean_rich, med_rich, sd_rich) write.csv(abund_summary, "Figures/rich_means_table.csv") # data frame for plotting rich.df <- as.data.frame(numb_spider) colnames(rich.df) <- c("rich") rich.df$LandUse <- LandUse rich.df$LandScape <- LandScape rich.df$LSLU <- LSLU ## check distribution of data descdist(numb_spider, discrete = T) descdist(numb_spider, discrete = F) fit.nbin <- fitdist(numb_spider, "nbinom") fit.pois <- fitdist(numb_spider, "pois") fit.norm <- fitdist(numb_spider, "norm") fit.lnorm <- fitdist(numb_spider, "lnorm") fit.gamma <- fitdist(numb_spider, "gamma") plot(fit.nbin) qqp(numb_spider, "nbinom", size = fit.nbin$estimate[[1]], mu = fit.nbin$estimate[[2]]) plot(fit.pois) qqp(numb_spider, "pois", lambda=fit.pois$estimate) plot(fit.norm) plot(fit.lnorm) plot(fit.gamma) #lowest aic indicates best fit fit.nbin$aic fit.pois$aic fit.norm$aic fit.lnorm$aic fit.gamma$aic # maybe it's close enough to normal? shapiro.test(numb_spider) # p = 0.1496 bartlett.test(numb_spider ~ interaction(LandUse, LandScape)) # p-value= 0.1555 bartlett.test(numb_spider ~ LandUse) # p-value = 0.1766 bartlett.test(numb_spider ~ LandScape) # p-value = 0.4637 interaction.plot(LandScape, LandUse, numb_spider) boxplot(numb_spider ~ LandUse * LandScape) #### testing for differences between land uses and landscapes #### rich_lm <- lm(numb_spider~LandUse * LandScape) summary(rich_lm) anova(rich_lm, test = "F") rich_lm2 <- lm(numb_spider~LandUse + LandScape) summary(rich_lm2) anova(rich_lm2, test = "F") rich_lm3 <- lm(numb_spider~LandUse) summary(rich_lm3) anova(rich_lm3, test = "F") anova(rich_lm2, rich_lm3) # final model: landuse only writeLines(capture.output(summary(rich_lm3)), con="Figures/rich_lm_summary.txt") # save test values for manuscript writeLines(capture.output(anova(rich_lm3, test = "F")), con="Figures/rich_lm.txt") # save test values for manuscript #### model checking with DHARMa: everything looks fine #### rich_sim <- simulateResiduals(rich_lm3) testResiduals(rich_sim) plot(rich_sim) plotResiduals(rich_sim$scaledResiduals, rich.df$LandUse) plot(rich_lm3) # plots suggest potentially influential observations BJ3, BJ5, HF4 #### multiple comparisons #### # no interaction between the factors, so set up the model with hypothesis for multiple comparisons rich_model1 <- glht(model = rich_lm3, linfct = mcp(LandUse = "Tukey")) # pairwise t.test with holm correction rich_HSD <- summary(rich_model1, adjusted(type = c(p.adjust.methods = c("holm")))) writeLines(capture.output(rich_HSD), con="Figures/rich_hsd.txt") # save test values for manuscript # get the letters for the plot rich_lu_cld <- cld(rich_HSD, decreasing = T) # want forest first, which has highest values rich_cld <- rich_lu_cld$mcletters$Letters # Richness plot ---------------------------------------------------------- # landscapes pooled rich_pirate_pooled <- ggplot(rich.df, aes(x = LandUse, y = rich)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LandUse, fill = LandUse), violins = T, bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) + labs(y = "species richness", x = NULL) + scale_x_discrete(labels = LU_abb) + theme(strip.background = element_rect(fill = "white", colour = "black")) ggsave(rich_pirate_pooled, filename = "Figures/richness_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # Biomass ----------------------------------------------------------------- #### data exploration #### med_biom <- tapply(biomass.df$TBM, LandUse, median) mean_biom <- tapply(biomass.df$TBM, LandUse, mean) sd_biom <- tapply(biomass.df$TBM, LandUse, sd) biomass_summary <- rbind(mean_biom, med_biom, sd_biom) write.csv(biomass_summary, "Figures/biomass_means_table.csv") descdist(biomass.df$TBM, discrete = F) shapiro.test(biomass.df$TBM) # data is probably not normal enough bartlett.test(biomass.df$TBM, biomass.df$LU) # and homogeneity of variance is not good enough bartlett.test(biomass.df$TBM, biomass.df$LS) bartlett.test(biomass.df$TBM, interaction(biomass.df$LU, biomass.df$LS)) shapiro.test(log(biomass.df$TBM)) # log-transformed data is normal enough bartlett.test(log(biomass.df$TBM), biomass.df$LU) # and homogeneity of variance is fine --> gaussian glm log link bartlett.test(log(biomass.df$TBM), biomass.df$LS) bartlett.test(log(biomass.df$TBM), interaction(biomass.df$LU, biomass.df$LS)) #### glm #### # gaussian error distribution with log link function biomass.glm <- glm(TBM ~ LU * LS, data = biomass.df, family = gaussian(link = 'log')) summary(biomass.glm) # residual deviance >> df but wait and see what DHARMa says anova(biomass.glm, test = 'F') biomass.glm2 <- glm(TBM ~ LU + LS, data = biomass.df, family = gaussian(link = 'log')) anova(biomass.glm2, test = 'F') biomass.glm3 <- glm(TBM ~ LU, data = biomass.df, family = gaussian(link = 'log')) anova(biomass.glm3, test = 'F') # no significant difference between models with and without landscape, so we use the simpler model anova(biomass.glm2, biomass.glm3, test = 'Chi') AICc(biomass.glm2, biomass.glm3) writeLines(capture.output(anova(biomass.glm3)), con="Figures/biomass_glm.txt") writeLines(capture.output(summary(biomass.glm3)), con="Figures/biomass_glm_summary.txt") # model checking with DHARMa bio_sim <- simulateResiduals(biomass.glm3) testResiduals(bio_sim) # no complaints about overdispersion, even though residual dis >> df! plot(bio_sim) # warning here, but everything else looks fine plotResiduals(bio_sim$scaledResiduals, biomass.df$LU) #### multiple comparisons #### biomass.mc <- glht(model = biomass.glm3, linfct = mcp(LU = "Tukey")) # pairwise t.test with holm correction biomass.HSD <- summary(biomass.mc, adjusted(type = c(p.adjust.methods = c("holm")))) writeLines(capture.output(biomass.HSD), con="Figures/biomass_Tukey.txt") # save the letters for plotting biomass.cld <- cld(biomass.HSD, decreasing = T) # want forest first biomass.annotation <- biomass.cld$mcletters$Letters #### plotting #### biomass.pirate <- ggplot(biomass.df, aes(x = LU, y = TBM)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LU, fill = LU), bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) + labs(y = expression("biomass, mg per "~m^2), x = NULL) + scale_x_discrete(labels = LU_abb) + theme(strip.background = element_rect(fill = "white", colour = "black")) + annotate( geom = "text", x = c(1:4), y = max(biomass.df$TBM)*1.05, label = biomass.annotation) ggsave(biomass.pirate, filename = "Figures/biomass_pirate.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # Alpha Diversity --------------------------------------------------- # inverse Simpson Index for alpha diversity div_spiders <- vegan::diversity(spiders_spp, "inv") # calculate the index med_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, median) mean_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, mean) sd_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, sd) adiv_summary <- rbind(mean_alpha, med_alpha, sd_alpha) write.csv(abund_summary, "Figures/inv_simp_means_table.csv") # check distribution of data descdist(div_spiders, discrete = F) fit.norm <- fitdist(div_spiders, "norm") fit.lnorm <- fitdist(div_spiders, "lnorm") fit.gamma <- fitdist(div_spiders, "gamma") plot(fit.norm) plot(fit.lnorm) plot(fit.gamma) #lowest aic indicates best fit fit.norm$aic fit.lnorm$aic # lowest AIC fit.gamma$aic # close enough to normal? yes shapiro.test(div_spiders) # p-value = 0.05555 bartlett.test(div_spiders ~ LandUse) # p-value = 0.1505 bartlett.test(div_spiders ~ LandScape) # p-value = 0.8129 bartlett.test(div_spiders ~ interaction(LandUse, LandScape)) # p-value = 0.2712 interaction.plot(LandScape, LandUse, div_spiders) boxplot(div_spiders ~ LandUse * LandScape) #### testing for differences between land uses and landscapes #### alpha_lm <- lm(div_spiders ~ LandUse * LandScape) summary(alpha_lm) anova(alpha_lm, test = "F") # step-wise simplification alpha_lm2 <- lm(div_spiders~LandUse + LandScape) summary(alpha_lm2) anova(alpha_lm2, test = "F") alpha_lm3 <- lm(div_spiders~LandUse) summary(alpha_lm3) anova(alpha_lm3, test = "F") anova(alpha_lm2, alpha_lm3, test = "Chisq") AICc(alpha_lm2, alpha_lm3) #### model checking with DHARMa: everything looks fine #### adiv_sim <- simulateResiduals(alpha_lm2) testResiduals(adiv_sim) plot(adiv_sim) plotResiduals(adiv_sim$scaledResiduals, LandUse) plot(alpha_lm2) # plots suggest potentially influential observations HF3, HJ2, BJ4 # explanatory power of both variables is low, but they do increase the explanatory power of the model so we'll save the additive model for writing the text writeLines(capture.output(summary(alpha_lm2)),con="Figures/inv_sim_lm2_summary.txt") writeLines(capture.output(anova(alpha_lm2, test = "F")),con="Figures/inv_sim_lm2_anova.txt") # Alpha diversity plot ---------------------------------------------------- ## not included in the paper, but here it is anyway :) alpha.df <- as.data.frame(div_spiders) alpha.df$LandUse <- LandUse alpha.df$LandScape <- LandScape # LandScapes pooled alpha_pirate <- ggplot(alpha.df, aes(x = LandUse, y = div_spiders)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LandUse, fill = LandUse), violins = T, bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) + labs(y = "Inverse Simpson's Index", x = NULL) + scale_x_discrete(labels = LU_abb) ggsave(alpha_pirate, filename = "Figures/alpha_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # partitioning beta diversity --------------------------------------------- # function to process dist2list dataframes, keeping only rows comparing the same LU within the same LS and # adding names for plotting. Requires post-dist2list dataframe (converted distance matrix), returns trimmed df # with factor names. trim_dist <- function(dis_list) { dis_list$lu1 <- factor(substr(dis_list$col, 2, 2)) # extract land use systems from Plot Names dis_list$lu2 <- factor(substr(dis_list$row, 2, 2)) df <- dis_list[dis_list$lu1 == dis_list$lu2, ] # keep only rows which compare the same land use system df$LU <- factor( df$lu1, levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm") ) # correct order for plotting df$ls1 <- factor(substr(df$col, 1, 1), labels = c("Bukit Duabelas", "Harapan")) # extract landscapes from Plot Names df$ls2 <- factor(substr(df$row, 1, 1), labels = c("Bukit Duabelas", "Harapan")) df$LS <- factor(paste(substr(df$col, 1, 1), substr(df$row, 1, 1), sep = "")) df <- df[df$ls1==df$ls2,] return(df) } # convert community matrix to presence/absence comm_pa<-ifelse(spiders_spp > 0, 1, 0) # calculate index and partitions. beta.pair returns three matrices: [1] turnover, [2] nestedness, [3] overall beta diversity dist<-beta.pair(comm_pa, # index.family="jaccard") index.family="sorensen") #### step 1 within group variability #### # compare multivariate dispersions from beta diversity distance matrix for the aspect of interest (turnover, nestedness, overall beta diversity) grouped by the factor. How variable are replicates within each factor level (distance to group centroid)? # start with total dissimilarity measured as Jaccard/Sorenesen pair-wise dissimilarity # both factors together (AxB) bd_LSLU<-betadisper(dist[[3]], LSLU) # double [] are important here plot(bd_LSLU) # plots the first two axes of the principal coordinates analysis (PCoA) boxplot(bd_LSLU) bSor_LSLU <- anova(bd_LSLU) # test for differences using the spatial median as the group centroid. Are there any significant differences in within-group variability? no betadisper_df <- tibble(component = "total dissimilarity", factor = "LS x LU", f_value = bSor_LSLU$'F value'[1], p_value = bSor_LSLU$'Pr(>F)'[1]) # save test values for later # for LS only bd_LS<-betadisper(dist[[3]],LandScape) plot(bd_LS) boxplot(bd_LS) bSor_LS <- anova(bd_LS) # no sig differences betadisper_df <- add_row(betadisper_df, component = "total dissimilarity", factor = "LS", f_value = bSor_LS$'F value'[1], p_value = bSor_LS$'Pr(>F)'[1]) # for LU only bd_LU<-betadisper(dist[[3]],LandUse) plot(bd_LU) boxplot(bd_LU) bSor_LU <- anova(bd_LU) # no sig differences betadisper_df <- add_row(betadisper_df, component = "total dissimilarity", factor = "LU", f_value = bSor_LU$'F value'[1], p_value = bSor_LU$'Pr(>F)'[1]) # next spatial turnover, measured as the turnover fraction of Jaccard pair-wise dissimility/Simpson pair-wise dissimilarity # both factors together to_bd_LSLU<-betadisper(dist[[1]],LSLU) plot(to_bd_LSLU) boxplot(to_bd_LSLU) turn_LSLU <- anova(to_bd_LSLU) # no sig diffs betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LS x LU", f_value = turn_LSLU$'F value'[1], p_value = turn_LSLU$'Pr(>F)'[1]) # for LS only to_bd_LS<-betadisper(dist[[1]],LandScape) plot(to_bd_LS) boxplot(to_bd_LS) turn_LS <- anova(to_bd_LS) # no sig differences betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LS", f_value = turn_LS$'F value'[1], p_value = turn_LS$'Pr(>F)'[1]) # for LU only to_bd_LU<-betadisper(dist[[1]],LandUse) plot(to_bd_LU) boxplot(to_bd_LU) turn_LU <- anova(to_bd_LU) # sig differences between variability of replicates within land uses betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LU", f_value = turn_LU$'F value'[1], p_value = turn_LU$'Pr(>F)'[1]) # and finally nestedness-resultant dissimilarity, measured as the nestedness fraction of Jaccard/Sorensen pair-wise dissimilarity # both factors together nest_bd_LSLU<-betadisper(dist[[2]],LSLU) plot(nest_bd_LSLU) boxplot(nest_bd_LSLU) nest_LSLU <- anova(nest_bd_LSLU) # no sig diffs betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LS x LU", f_value = nest_LSLU$'F value'[1], p_value = nest_LSLU$'Pr(>F)'[1]) # for LS only nest_bd_LS<-betadisper(dist[[2]],LandScape) plot(nest_bd_LS) boxplot(nest_bd_LS) nest_LS <- anova(nest_bd_LS) # sig differences between variability of replicates within landscapes betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LS", f_value = nest_LS$'F value'[1], p_value = nest_LS$'Pr(>F)'[1]) # for LU only nest_bd_LU<-betadisper(dist[[2]],LandUse) plot(nest_bd_LU) boxplot(nest_bd_LU) nest_LU <- anova(nest_bd_LU) # no differences between variability of replicates within landuses betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LU", f_value = nest_LU$'F value'[1], p_value = nest_LU$'Pr(>F)'[1]) write.csv2(format(as.data.frame(betadisper_df), digits = 3), file = "Figures/beta_part_Sorensen_disp.csv") #### Step 2: adonis #### # compare differences in community composition, ie differences in the locations of the centroids in multi-dimensional space. # done separately for each of the 3 beta diversity components. test interactions, simplify model. # overall beta diversity ado_overall <- adonis2(dist[[3]] ~ LandUse * LandScape) # interaction p = 0.007 pwa_overall_LSLU <- pairwise.adonis(dist[[3]], LSLU, p.adjust.m = "fdr") # most comparisons significant # pwa_overall_LS <- pairwise.adonis(dist[[3]], LandScape, p.adjust.m = "fdr") # pwa_overall_LU <- pairwise.adonis(dist[[3]], LandUse, p.adjust.m = "fdr") # turnover ado_to <- adonis2(dist[[1]]~ LandUse * LandScape) # interaction p = 0.002 pwa_to_LSLU <- pairwise.adonis(dist[[1]], LSLU, p.adjust.m = "fdr") # most comparisons significant # pwa_to_LS <- pairwise.adonis(dist[[1]], LandScape, p.adjust.m = "fdr") # pwa_to_LU <- pairwise.adonis(dist[[1]], LandUse, p.adjust.m = "fdr") # nestedness ado_n <- adonis2(dist[[2]] ~ LandUse * LandScape) # interaction p = 0.753 ado_n2 <- adonis2(dist[[2]] ~ LandUse + LandScape) # landscape p = 0.999 ado_n3 <- adonis2(dist[[2]] ~ LandUse) # pwa_n_LSLU <- pairwise.adonis(dist[[2]], LSLU, p.adjust.m = "fdr") # pwa_n_LS <- pairwise.adonis(dist[[2]], LandScape, p.adjust.m = "fdr") pwa_n_LU <- pairwise.adonis(dist[[2]], LandUse, p.adjust.m = "fdr") # forest and jungle rubber significantly different from oil palm and rubber # based on these results, we will do the nmds later with interaction of landuse and landscape for overall betadiversity and turnover, just landuse for nestedness #### Step 3: Differences by groups #### # is the mean beta diversity, turnover, nestedness different between each landuse and landscape category? # turn the distance matrices into dataframes values for within LS and LU comparisons dis_o <- dist2list(dist[[3]]) # <3 metagMisc <3 within_o <- trim_dist(dis_o) # my processing function above dis_t <- dist2list(dist[[1]]) within_t <- trim_dist(dis_t) dis_n <- dist2list(dist[[2]]) within_n <- trim_dist(dis_n) # (g)lm approach: overall descdist(within_o$value, discrete = F) shapiro.test (within_o$value) bartlett.test(within_o$value ~ within_o$lu1) bartlett.test(within_o$value ~ within_o$LS) # meets requirements of lm bartlett.test(within_o$value ~ interaction(within_o$lu1, within_o$LS)) fit_o <- lm(value~LU*LS, data = within_o) fit_o2 <- step(fit_o) summary(fit_o2) anova(fit_o2) # no significant differences by landscape or landuse # (g)lm approach: turnover descdist(within_t$value, discrete = F) shapiro.test (within_t$value) bartlett.test(within_t$value ~ within_t$lu1) bartlett.test(within_t$value ~ within_t$LS) # meets requirements of lm bartlett.test(within_t$value ~ interaction(within_t$lu1, within_t$LS)) fit_t <- lm(value ~ LU * LS, data = within_t) fit_t2 <- step(fit_t) summary(fit_t2) anova(fit_t2) # significant interaction between landscape and land use # (g)lm approach: turnover descdist(within_n$value, discrete = F) fit.norm <- fitdist(within_n$value, "norm") fit.lnorm <- fitdist(within_n$value + 0.00001, "lnorm") # there are two 0s in the data, which it doesn't like fit.gamma <- fitdist(within_n$value + 0.00001, "gamma") fit.beta <- fitdist(within_n$value + 0.00001, "beta") fit.norm$aic fit.lnorm$aic fit.gamma$aic fit.beta$aic # much better shapiro.test (within_n$value) bartlett.test(within_n$value ~ within_n$lu1) bartlett.test(within_n$value ~ within_n$LS) # does not meet requirements of lm bartlett.test(within_n$value ~ interaction(within_n$lu1, within_n$LS)) shapiro.test (log(within_n$value+1)) bartlett.test(log(within_n$value+1) ~ within_n$lu1) bartlett.test(log(within_n$value+1) ~ within_n$LS) # a bit dodgy still bartlett.test(log(within_n$value+1) ~ interaction(within_n$lu1, within_n$LS)) # then let's pad the data to get rid of the 0s and use beta error distribution within_n$padded <- within_n$value+0.00001 fit_n <- betareg(padded ~ LU * LS, data = within_n) summary(fit_n) fit_n2 <- betareg(padded ~ LU + LS, data = within_n) summary(fit_n2) lrtest(fit_n, fit_n2) AICc(fit_n, fit_n2) # model with interaction is better than the additive model # kruskal-wallis tests -- same results within_o$LSLU <- as.factor(paste(within_o$LU, within_o$LS, sep = "_")) within_t$LSLU <- as.factor(paste(within_t$LU, within_t$LS, sep = "_")) within_n$LSLU <- as.factor(paste(within_n$LU, within_n$LS, sep = "_")) kruskal.test(value~LSLU, data = within_o) #nsd kruskal.test(value~LS, data = within_o) #nsd kruskal.test(value~LU, data = within_o) #nsd kruskal.test(value~LSLU, data = within_t) # p-value = 0.0323 # kruskal.test(value~LS, data = within_t) # kruskal.test(value~LU, data = within_t) kruskal.test(value~LSLU, data = within_n) # p-value = 0.04865 # kruskal.test(value~LS, data = within_n) # kruskal.test(value~LU, data = within_n) #### Step 4: visualisation #### # we show nmds (below) in the paper, but this is a common way of visualising it, so we just made these figures for our own information. beta_overall <- ggplot(within_o, aes(x = LU, y = value)) + cleanup + scale_color_manual(values = system_col) + geom_pirate(aes(colour = LU, fill = LU), bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) + labs(y = "overall Beta diversity (Sorensen)", x = NULL) beta_turnover <- ggplot(within_t, aes(x = LU, y = value)) + #interaction landuse and landscape cleanup + scale_color_manual(values = system_col_double) + geom_pirate(aes(colour = ls1), bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col_double, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) + labs(y = "Beta diversity, turnover (pairwise Simpson)", x = NULL) beta_nestedness <- ggplot(within_n, aes(x = LU, y = value)) + #interaction landuse and landscape cleanup + scale_color_manual(values = system_col_double) + geom_pirate(aes(colour = ls1), bars = F, points_params = list(shape = 16, size = 1), lines_params = list(size = 0.5, width = 0.5), cis_params = list(fill = system_col_double, size = 0.6, alpha = 0.5, width = 0.5), violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) + labs(y = "Beta diversity, nestedness", x = NULL) # ggsave(beta_overall, filename = "./figures/Beta_diversity/beta_overall.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # ggsave(beta_turnover, filename = "./figures/Beta_diversity/beta_turnover.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # ggsave(beta_nestedness, filename = "./figures/Beta_diversity/beta_nestedness.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) #### Step 5: create means table #### # combine all aspects into one data frame dis_all <- as.data.frame(cbind(overall = within_o$value, turnover = within_t$value, nestness = within_n$value, within_o[6], LS = within_o[,7], within_o[10]) ) summary_LULS <- CreateTableOne(c("overall", "turnover", "nestness"), c("LU", "LS"), dis_all, test = F) summary_LU <- CreateTableOne(c("overall", "turnover", "nestness"), c("LU"), dis_all, test = F) # shows on the screen, but also saves as csv file write.csv2(print(summary_LU), file = "Figures/summary_betapart_LU.csv") write.csv2(print(summary_LULS), file = "Figures/summary_betapart_LULS.csv") # nmds for each beta diversity partition -------- # based on adonis results above, include interaction of LU and LS for overall and turnover, just LU in nestedness # Helper-function from Ralf Schaefer to check for stress and dimensions for NMDS # x must be a data-object which can be coerced into a matrix; # max is the maximum number of dimensions # dist_meas is the distance measure scree_values <- function(x, max, dist_meas, binary=F) # dist_meas can use any of the options in vegdist. { xx <- as.matrix(x) scree.points = NULL scree.temp = 1 for(i in 1:max) { sol.mds = NULL sol.mds <- replicate(10, metaMDS(xx, k = i, trymax = 30, distance = dist_meas, binary)$stress, simplify = TRUE) scree.points <- append(scree.points, sol.mds) } return(scree.points) } # Calculate relationship between dimensions and stress # turnover = 5 dimensions scree.res <- scree_values(as.matrix(dist[[1]]), max = 10, dist_meas = "bray", binary=F) # vectorise for plotting replicated runs of NMDS plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) # Plot dimensions vs. stress # Red line indicates threshold for "good" Stress value, in this case 5. # Above the line is too few dimensions, below is more than necessary. # We have chosen our stress value to be close to 0.1 to reduce the number of minimally helpful dimensions, # which we cannot visualise anyway. The closer the stress gets to 0.05 the better, but it is a case of diminishing returns. plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions") lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean)))) abline(h = 0.10, col = "red", lty = "dashed") # nestedness = 3 dimensions scree.res <- scree_values(as.matrix(dist[[2]]), max = 10, dist_meas = "bray", binary=F) plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions") lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean)))) abline(h = 0.10, col = "red", lty = "dashed") # overall = 4 dimensions scree.res <- scree_values(as.matrix(dist[[3]]), max = 10, dist_meas = "bray", binary=F) plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions") lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean)))) abline(h = 0.10, col = "red", lty = "dashed") #### run ndms #### NMDS_turnover <- metaMDS(dist[[1]], k = 5, trymax = 50) stressplot(NMDS_turnover, main = paste("k = ", NMDS_turnover$ndim, ", Stress =", round(NMDS_turnover$stress, 3))) NMDS_nestedness <- metaMDS(dist[[2]], k = 3) stressplot(NMDS_nestedness, main = paste("k = ", NMDS_nestedness$ndim, ", Stress =", round(NMDS_nestedness$stress, 3))) NMDS_overall <- metaMDS(dist[[3]], k = 4) stressplot(NMDS_overall, main = paste("k = ", NMDS_overall$ndim, ", Stress =", round(NMDS_overall$stress, 3))) #### plot nmds and manova #### # based on adonis results above, include interaction of LU and LS for overall and turnover, just LU in nestedness ##### overall: interaction landuse and landscape ##### nmds_scores_o <- cbind(vegan::scores(NMDS_overall, display = 'sites')) comm.lda <- lda(nmds_scores_o, interaction(LandUse, LandScape)) # multivariate anova; does the variable significantly predict variability in nmds scores? comm_man_o <- manova(nmds_scores_o ~ LandUse * LandScape) sum_man_o <- summary(comm_man_o, test = 'Wilks') writeLines(capture.output(sum_man_o), con="Figures/betadiv_part_overall_manova.txt") # pairwise comparisons o_man_emmeans <- emmeans(comm_man_o, ~ LandUse * LandScape) pw.o <- test(contrast(o_man_emmeans, "pairwise", adjust = "fdr", side = "=")) writeLines(capture.output(pw.o), con="Figures/betadiv_part_overall_manova_pwc.txt") # centroids for annotation nmds_scores_o <- as.data.frame(nmds_scores_o) nmds_scores_o$LSLU <- LSLU site.centroid_o <- as.data.frame(nmds_scores_o) %>% group_by(System = LSLU) %>% summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2)) # and finally the plot! ordi_overall <- ggplot(data = nmds_scores_o, mapping = aes(x = NMDS1, y = NMDS2)) + cleanup + scale_color_manual(values = system_col_double) + scale_fill_manual(values = alpha(c(system_col_double), (rep(c(0.65, 0.25), each = 4)))) + stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse geom = "polygon", level = 0.75, aes(fill = LSLU, color = LSLU) ) + geom_point( size = 1, shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan stroke = 0.5, fill = system_col[LandUse], show.legend = F ) + annotate( geom = "text", x = c( site.centroid_o$NMDS1.center[1] - 0.03, #BF site.centroid_o$NMDS1.center[2] + 0.1, #BJ site.centroid_o$NMDS1.center[3] + 0.06, #BR site.centroid_o$NMDS1.center[4], #BO site.centroid_o$NMDS1.center[5], #HF site.centroid_o$NMDS1.center[6] , #HJ site.centroid_o$NMDS1.center[7], #HR site.centroid_o$NMDS1.center[8] + 0.03#HO ), y = c( site.centroid_o$NMDS2.center[1] + 0.1, #BF site.centroid_o$NMDS2.center[2], #BJ site.centroid_o$NMDS2.center[3] + 0.06, #BR site.centroid_o$NMDS2.center[4] , #BO site.centroid_o$NMDS2.center[5] , #HF site.centroid_o$NMDS2.center[6] , #HJ site.centroid_o$NMDS2.center[7] + 0.05, #HR site.centroid_o$NMDS2.center[8] + 0.1 #HO ), label = levels(LSLU), fontface = "bold", size = 3 ) + # annotate(geom = "text", # x = min(nmds_cent_o$NMDS1) - 0.1, # y = max(nmds_cent_o$NMDS2) + 0.1, # label = paste("Landuse: Wilks =", round(comm_man_o_LSLU$stats["LandUse", "Wilks"], 3), # "F =", round(comm_man_o_LSLU$stats["LandUse", "approx F"], 3), # "p < 0.001", # "\nLandscape: Wilks =", round(comm_man_o_LSLU$stats["LandScape", "Wilks"], 3), # "F =", round(comm_man_o_LSLU$stats["LandScape", "approx F"], 3), # "p < 0.001", # "\nInteraction: Wilks =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "Wilks"], 3), # "F =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "approx F"], 3), # "p =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "Pr(>F)"], 4) # ), # fontface = "bold", # size = 2, # hjust = 0)+ theme(legend.position = "none") ggsave(ordi_overall, filename = "Figures/nmds_betapart_overall_naked.pdf", device = "pdf", width = 164, height = 164, units = "mm", dpi = 600) ##### turnover: interaction landuse and landscape ##### nmds_scores_t <- cbind(vegan::scores(NMDS_turnover, display = 'sites')) comm.lda<-lda(nmds_scores_t, interaction(LandUse, LandScape)) comm_man_t <- manova(nmds_scores_t ~ LandUse * LandScape) sum_man_t <- summary(comm_man_t, test = 'Wilks') writeLines(capture.output(sum_man_t), con="Figures/betadiv_part_turnover_manova.txt") # pairwise comparisons t_man_emmeans <- emmeans(comm_man_t, ~ LandUse * LandScape) pw.t <- test(contrast(t_man_emmeans, "pairwise", adjust = "fdr", side = "=")) writeLines(capture.output(pw.t), con="Figures/betadiv_part_turnover_manova_pwc.txt") # centroids for annotation nmds_scores_t <- as.data.frame(nmds_scores_t) nmds_scores_t$LSLU <- LSLU site.centroid_t <- nmds_scores_t %>% group_by(System = LSLU) %>% summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2)) # and finally the plot! ordi_turnover <- ggplot(data = nmds_scores_t, mapping = aes(x = NMDS1, y = NMDS2)) + cleanup + scale_color_manual(values = system_col_double) + scale_fill_manual(values = alpha(c(system_col_double), (rep(c(0.65, 0.25), each = 4)))) + stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse geom = "polygon", level = 0.75, aes(fill = LSLU, color = LSLU) ) + geom_point( size = 1, shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan stroke = 0.5, fill = system_col[LandUse], show.legend = ) + annotate( geom = "text", x = c( site.centroid_t$NMDS1.center[1] - 0.02, #BF site.centroid_t$NMDS1.center[2] - 0.05, #BJ site.centroid_t$NMDS1.center[3] - 0.05, #BR site.centroid_t$NMDS1.center[4], #BO site.centroid_t$NMDS1.center[5] + 0.05, #HF site.centroid_t$NMDS1.center[6] + 0.03, #HJ site.centroid_t$NMDS1.center[7], #HR site.centroid_t$NMDS1.center[8] +0.03 #HO ), y = c( site.centroid_t$NMDS2.center[1] + 0.1, #BF site.centroid_t$NMDS2.center[2] - 0.05, #BJ site.centroid_t$NMDS2.center[3], #BR site.centroid_t$NMDS2.center[4] , #BO site.centroid_t$NMDS2.center[5] , #HF site.centroid_t$NMDS2.center[6], #HJ site.centroid_t$NMDS2.center[7] , #HR site.centroid_t$NMDS2.center[8] - 0.04 #HO ), label = levels(LSLU), fontface = "bold", size = 3 ) + # annotate(geom = "text", # x = max(nmds_cent_t$NMDS1) - 0.3, # y = max(nmds_cent_t$NMDS2) + 0.05, # label = paste("Landuse: Wilks =", round(comm_man_t_LSLU$stats["LandUse", "Wilks"], 3), # "F =", round(comm_man_t_LSLU$stats["LandUse", "approx F"], 3), # "p < 0.001", # "\nLandscape: Wilks =", round(comm_man_t_LSLU$stats["LandScape", "Wilks"], 3), # "F =", round(comm_man_t_LSLU$stats["LandScape", "approx F"], 3), # "p =", round(comm_man_t_LSLU$stats["LandScape", "Pr(>F)"], 4), # "\nInteraction: Wilks =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "Wilks"], 3), # "F =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "approx F"], 3), # "p =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "Pr(>F)"], 4) # ), # fontface = "bold", # size = 1.5, # hjust = 0) + theme(legend.position = "none") ggsave(ordi_turnover, filename = "Figures/nmds_betapart_turnover_naked.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) ##### nestedness: just Landuse ##### nmds_scores_n <- cbind(vegan::scores(NMDS_nestedness, display = 'sites')) comm.lda<-lda(nmds_scores_n, interaction(LandUse, LandScape)) # like adonis, interaction and landscape not significant in manova comm_man_n <- summary(manova(nmds_scores_n ~ LandUse * LandScape), test = "Wilks") comm_man_n2 <- manova(nmds_scores_n ~ LandUse) sum_man_n <- summary(comm_man_n2, test = 'Wilks') writeLines(capture.output(sum_man_n), con="Figures/betadiv_part_nestedness_manova.txt") # pairwise comparisons n_man_emmeans <- emmeans(comm_man_n2, ~ LandUse) pw.n <- test(contrast(n_man_emmeans, "pairwise", adjust = "fdr", side = "=")) writeLines(capture.output(pw.n), con="Figures/betadiv_part_nestedness_manova_pwc.txt") # centroids for annotation nmds_scores_n <- as.data.frame(nmds_scores_n) nmds_scores_n$LandUse <- LandUse site.centroid_n <- nmds_scores_n %>% group_by(System = LandUse) %>% summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2)) # and finally the plot! ordi_nestedness <- ggplot(data = nmds_scores_n, mapping = aes(x = NMDS1, y = NMDS2)) + cleanup + scale_color_manual(values = system_col) + scale_fill_manual(values = system_col) + stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse data = nmds_scores_n, geom = "polygon", level = 0.75, alpha = 0.75, aes(fill = LandUse, color = LandUse) ) + geom_point( data = nmds_scores_n, size = 1, shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan stroke = 0.5, fill = system_col[LandUse], show.legend = ) + # annotate(geom = "text", # x = max(nmds_scores_n$NMDS1), # y = max(nmds_scores_n$NMDS2), # label = paste("Wilks = ", round(comm_man_n$stats["LandUse", "Wilks"], 3), # "\nF = ", round(comm_man_n$stats["LandUse", "approx F"], 3), # "\np = <0.001"), # fontface = "bold", # size = 2)+ theme(legend.position = "none") ggsave(ordi_nestedness, filename = "Figures/nmds_part_nestedness_naked.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) #### nmds turnover + nestednesss combo plot #### nmds_tn_plot <- ordi_turnover + ordi_nestedness + plot_annotation(tag_levels = 'A') ggsave(nmds_tn_plot, filename = "Figures/nmds_tn_combined.pdf", device = "pdf", width = 340, height = 160, units = "mm", dpi = 600) # overall - turnover - nestedness graphic --------------------------------- # not in the paper but an overview for ourselves div_parts <- data.frame(LU = within_o$LU, LS = within_o$ls1, overall = within_o$value, turnover = within_t$value, nestedness = within_n$value) div_parts_m <- melt(div_parts) parts_plot <- ggplot(data = div_parts_m, aes(x = value)) + cleanup + scale_color_manual(values = system_col) + facet_grid(variable~.) + labs(x = "Beta diversity") + theme(legend.position = "none") parts_LS <- parts_plot + # Bukit solid Harapan dashed lines geom_freqpoly(aes(colour = LU, linetype = LS), binwidth = 0.05) parts_pooled <- parts_plot + # landscapes pooled geom_freqpoly(aes(colour = LU), binwidth = 0.05) ggsave(parts_LS, filename = "Figures/parts_LS_Bukit_solid.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) ggsave(parts_pooled, filename = "Figures/parts_LUonly.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) # CCA ---------------------------------------------------------- # requires community matrix and environmental data ("ordination.data" below) # community matrix is spiders_spp or spiders_fam #### environmental data #### ordination.data <- read.csv(file ="Data/EnvironmentalVariables.csv") summary(ordination.data) rownames(ordination.data) <- ordination.data$PlotID # extract land use systems and landscapes from Plot Names ordination.data$LU <- factor(substr(ordination.data$PlotID, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) ordination.data$LS <- factor(substr(ordination.data$PlotID, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) ordination.data2 <- ordination.data[,-c(1,9:10)] # data frame without metadata summary(ordination.data2) ##### Test for correlations among env variables ##### # data standardised to range 0 ... 1 Environment_comparison2 <- decostand(ordination.data2, method="range") # Function to test for (Pearson) correlation of environmental variables. my_custom_cor <- function(data, mapping, color = I("grey50"), sizeRange = c(1, 5), ...) { # extract the x and y data from the standardised environmental data x <- GGally::eval_data_col(data, mapping$x) y <- GGally::eval_data_col(data, mapping$y) ct <- cor.test(x,y, method = "pearson") sig <- symnum( ct$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ") ) r <- unname(ct$estimate) rt <- format(r, digits=2)[1] # since we can't print it to get the strsize, just use the max size range cex <- max(sizeRange) # helper function to calculate a useable size percent_of_range <- function(percent, range) { percent * diff(range) + min(range, na.rm = TRUE) } # plot the cor value ggally_text( label = as.character(rt), mapping = aes(), xP = 0.5, yP = 0.5, size = I(percent_of_range(cex * abs(r), sizeRange)), cex=2, color = color, ... ) + # add the sig stars geom_text( aes_string( x = 0.8, y = 0.8 ), label = sig, size = I(cex), color = color, ... ) + # remove all the background stuff and wrap it with a dashed line theme_classic() + theme( panel.background = element_rect( color = color, linetype = "longdash" ), axis.line = element_blank(), axis.ticks = element_blank(), axis.text.y = element_blank(), axis.text.x = element_blank() ) } my_custom_smooth <- function(data, mapping, ...) { ggplot(data = data, mapping = mapping) + geom_point(color = I("blue")) + geom_smooth(method = "lm", color = I("black"), ...) } # show correlations between the environmental variables ggpairs(data = Environment_comparison2, upper = list(continuous = wrap(my_custom_cor, sizeRange = c(1,3))), lower = list(continuous = my_custom_smooth), axisLabels = "internal") # significant correlations between most of the variables. all are <|95|%, but several are >|80|%. #### msp CCA with all environmental variables #### # CCA accordning to numerical ecology: NO HELLINGER TRANSFORMATION # CCAs are more biased by rare species. We did not check that here. # set up full model with all environmental variables cca_raw_msp <- cca(spiders_spp ~ ., ordination.data2) vif.cca(cca_raw_msp) r2adjust_raw_msp <- RsquareAdj(cca_raw_msp)$adj.r.squared # set up null model with no environmental variables cca_0_msp<-cca(spiders_spp ~ 1, data=ordination.data2) # ordiR2step uses P value and the adjusted R^2 of your raw cca to select which variable to add to the model; ?ordiR2step explains quite well why its better than usual ordistep function or see: https://www.davidzeleny.net/anadat-r/doku.php/en:forward_sel_examples cca_stepforward_msp <- ordiR2step(cca_0_msp, scope = formula(cca_raw_msp), R2scope = r2adjust_raw_msp, direction = "forward", permutations = 999) vif.cca(cca_stepforward_msp) summary(cca_stepforward_msp) anova.cca(cca_stepforward_msp, step = 1000, by = "axis") anova(cca_stepforward_msp, by = "term", permutations = 999) write.csv(cca_stepforward_msp$CCA$eig, "Figures/cca_msp_eigenvalues.csv") writeLines(capture.output(anova.cca(cca_stepforward_msp,step=1000,by="axis")),con="Figures/cca_msp_perm.test_axes.txt") writeLines(capture.output(anova(cca_stepforward_msp, by = "term", permutations = 999)),con="Figures/cca_msp_perm.test_env.variables.txt") r2_adj_cca_msp<-RsquareAdj(cca_stepforward_msp)$adj.r.squared #### msp CCA Plot #### # adjusted % variation explained by each axis (CCA1.explains.adj_msp<-round(cca_stepforward_msp$CCA$eig["CCA1"]/sum(cca_stepforward_msp$CCA$eig) * RsquareAdj(cca_stepforward_msp)$adj.r.squared,digits=4)*100) (CCA2.explains.adj_msp<-round(cca_stepforward_msp$CCA$eig["CCA2"]/sum(cca_stepforward_msp$CCA$eig) * RsquareAdj(cca_stepforward_msp)$adj.r.squared,digits=4)*100) # extract species and site scores (specify vegan to avoid conflict between packages) species.scores_msp <- data.frame(vegan::scores(cca_stepforward_msp, display = "species", choices = c(1, 2))) site.scores_msp <- data.frame(vegan::scores(cca_stepforward_msp, display = "sites", choices = c(1, 2))) site.scores_msp$landuse <- factor(substr(rownames(site.scores_msp), 2, 2), levels = c("F", "J", "R", "O")) site.scores_msp$landscape <- factor(substr(rownames(site.scores_msp), 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) location.centroids_msp <- site.scores_msp %>% group_by(site.scores_msp$landuse) %>% dplyr::select(CCA1, CCA2) %>% summarise_all(mean) ford_msp <- fortify(cca_stepforward_msp, axes = 1:2) take <- c("CCA1","CCA2") arrows_msp <- subset(ford_msp, Score=="biplot") autoplot(cca_stepforward_msp) mul_msp <- ggvegan:::arrowMul(arrows_msp[,take], subset(ford_msp, select = take, Score=="sites")) arrows_msp[,take] <- arrows_msp[,take] * mul arrows_msp$Label <- c("Plant \nRichness", "Trees/ha") # nicer names cca_plot_msp <- ggplot()+ geom_vline(aes(xintercept = 0), linetype = "dashed") + geom_hline(aes(yintercept = 0), linetype = "dashed") + scale_color_manual(values = system_col) + scale_fill_manual(values=system_col) + geom_point(data = species.scores_msp, mapping = aes(x = CCA1, y = CCA2), shape = '+', # Plot species as + symbol colour = "gray25" # Make the symbols gray ) + geom_segment(data = arrows_msp, mapping = aes(x=0, y=0, xend = CCA1, yend = CCA2), arrow = arrow(length = unit(0.01, "npc")) ) + geom_text(data=arrows_msp, mapping=aes(label = Label, x = c(-2.1, - 1.5), y = c(-0.7, 1.7)), size = 2)+ coord_fixed() + scale_x_continuous(expand = c(.1, .1)) + scale_y_continuous(expand = c(.1, .1)) + labs(x = paste( "CCA1: Explained variation:", CCA1.explains.adj_msp, "%"), y = paste("CCA2: Explained variation:", CCA2.explains.adj_msp, "%")) + stat_chull(data = site.scores_msp, mapping = aes(x = CCA1, y = CCA2, fill = landuse, colour = landuse), geom = "polygon", alpha = 0.25, size = 1) + geom_point(data=site.scores_msp, mapping=aes(x=CCA1, y=CCA2, color = landuse, shape = landscape), # circle Bukit Duabelas, triangle Harapan size = 2 ) + annotate(geom = "text", # With names x = location.centroids_msp$CCA1, y = location.centroids_msp$CCA2, label = location.centroids_msp$`site.scores_msp$landuse` ) + theme(legend.position = "none", axis.text = element_text(size = 8),) + cleanup ggsave(cca_plot_msp, filename = "Figures/cca_msp.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) #### msp CCA without most highly correlated variable #### # no difference in the results env_trimmed <- ordination.data2[, - c(2)] # drop relative humidity, which is involved to 2 correlation >|85|% # standardise the data range 0...1 env_trimmed_stand <- decostand(env_trimmed, method="range") ggpairs(data = env_trimmed_stand, upper = list(continuous = wrap(my_custom_cor, sizeRange = c(1,3))), lower = list(continuous = my_custom_smooth), axisLabels = "internal") cca_raw2_msp <- cca(spiders_spp ~ ., env_trimmed) # trimmed set of environmental variables summary(cca_raw2_msp) anova(cca_raw2_msp,step=1000) anova(cca_raw2_msp,by="axis",step=1000) anova(cca_raw2_msp, by = "term", permutations = 999) vif.cca(cca_raw2_msp) r2adjust_raw2_msp <- RsquareAdj(cca_raw2_msp)$adj.r.squared cca_stepforward2_msp <-ordiR2step(cca_0_msp, scope=formula(cca_raw2_msp), R2scope =r2adjust_raw2_msp, direction="forward", permutations=999 ) vif.cca(cca_stepforward2_msp) summary(cca_stepforward2_msp) anova.cca(cca_stepforward2_msp,step=1000,by="axis") anova(cca_stepforward2_msp, by = "term", permutations = 999) r2_adj_cca2_msp<-RsquareAdj(cca_stepforward2_msp)$adj.r.squared #### family-level CCA with all environmental variables #### # CCA accordning to numerical ecology: NO HELLINGER TRANSFORMATION # CCAs are more biased by rare species. We did not check that here. # set up full model with all environmental variables cca_raw_fam <- cca(spiders_fam ~ ., ordination.data2) vif.cca(cca_raw_fam) r2adjust_raw_fam <- RsquareAdj(cca_raw_fam)$adj.r.squared # set up null model with no environmental variables cca_0_fam <- cca(spiders_fam ~ 1, data=ordination.data2) # ordiR2step uses P value and the adjusted R^2 of your raw cca to select which variable to add to the model; ?ordiR2step explains quite well why its better than usual ordistep function or see: https://www.davidzeleny.net/anadat-r/doku.php/en:forward_sel_examples cca_stepforward_fam <- ordiR2step(cca_0_fam, scope = formula(cca_raw_fam), R2scope = r2adjust_raw_fam, direction = "forward", permutations = 999) vif.cca(cca_stepforward_fam) summary(cca_stepforward_fam) anova.cca(cca_stepforward_fam, step=1000, by="axis") anova(cca_stepforward_fam, by = "term", permutations = 999) write.csv(cca_stepforward_fam$CCA$eig, "Figures/cca_family_eigenvalues.csv") writeLines(capture.output(anova.cca(cca_stepforward_fam,step=1000,by="axis")),con="Figures/cca_family_perm.test_axes.txt") writeLines(capture.output(anova(cca_stepforward_fam, by = "term", permutations = 999)),con="Figures/cca_family_perm.test_env.variables.txt") r2_adj_cca_fam <- RsquareAdj(cca_stepforward_fam)$adj.r.squared #### family-level CCA Plot #### # adjusted % variation explained by each axis (CCA1.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA1"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100) (CCA2.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA2"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100) (CCA3.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA3"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100) # extract species and site scores (specify vegan to avoid conflict between packages) species.scores_fam <- data.frame(vegan::scores(cca_stepforward_fam, display = "species", choices = c(1, 2))) site.scores_fam <- data.frame(vegan::scores(cca_stepforward_fam, display = "sites", choices = c(1, 2))) # rotate to make symmetrical with msp figure species.scores_fam$CCA1 <- species.scores_fam$CCA1 * -1 species.scores_fam$CCA2 <- species.scores_fam$CCA2 * -1 site.scores_fam$CCA1 <- site.scores_fam$CCA1 * -1 site.scores_fam$CCA2 <- site.scores_fam$CCA2 * -1 site.scores_fam$landuse <- factor(substr(rownames(site.scores_fam), 2, 2), levels = c("F", "J", "R", "O")) site.scores_fam$landscape <- factor(substr(rownames(site.scores_fam), 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) location.centroids_fam <- site.scores_fam %>% group_by(site.scores_fam$landuse) %>% dplyr::select(CCA1, CCA2) %>% summarise_all(mean) ford_fam <- fortify(cca_stepforward_fam, axes = 1:2) take <- c("CCA1","CCA2") arrows_fam <- subset(ford_fam, Score=="biplot") # rotate the arrows too arrows_fam$CCA1 <- arrows_fam$CCA1 * -1 arrows_fam$CCA2 <- arrows_fam$CCA2 * -1 autoplot(cca_stepforward_fam) mul_fam <- ggvegan:::arrowMul(arrows_fam[,take], subset(ford_fam, select = take, Score=="sites")) arrows_fam[,take] <- arrows_fam[,take] * mul_fam arrows_fam$Label <- c("Canopy \nOpenness","Aboveground \nBiomass", "Trees/ha") # nicer names cca_plot_fam <- ggplot()+ geom_vline(aes(xintercept = 0), linetype = "dashed") + geom_hline(aes(yintercept = 0), linetype = "dashed") + scale_color_manual(values = system_col) + scale_fill_manual(values=system_col) + geom_point(data = species.scores_fam, mapping = aes(x = CCA1, y = CCA2), shape = '+', # Plot species as + symbol colour = "gray25" # Make the symbols gray ) + coord_fixed() + scale_x_continuous(expand = c(.1, .1)) + scale_y_continuous(expand = c(.1, .1)) + labs(x = paste( "CCA1: Explained variation:", CCA1.explains.adj_fam, "%"), y = paste("CCA2: Explained variation:", CCA2.explains.adj_fam, "%")) + stat_chull(data = site.scores_fam, mapping = aes(x = CCA1, y = CCA2, fill = landuse, colour = landuse), geom = "polygon", alpha = 0.25, size = 1) + geom_point(data=site.scores_fam, mapping=aes(x=CCA1, y=CCA2, color = landuse, shape = landscape), # circle Bukit Duabelas, triangle Harapan size = 2 ) + geom_segment(data = arrows_fam, mapping = aes(x=0, y=0, xend = CCA1, yend = CCA2), arrow = arrow(length = unit(0.01, "npc")) ) + annotate(geom = "text", # With names x = location.centroids_fam$CCA1 + c(-0.5, 0.1, 0, 0.05), y = location.centroids_fam$CCA2 + c(0.3, -0.1, 0, -0.05), label = location.centroids_fam$`site.scores_fam$landuse` ) + geom_text(data=arrows_fam, mapping=aes(label = Label, x = c(2.5, -2.6, -1.6), y = c(-0.7, -0.7, 1.7)), size = 2, vjust = 'inward', hjust = 'inward') + theme(legend.position = "none", axis.text = element_text(size = 8),) + cleanup ggsave(cca_plot_fam, filename = "Figures/cca_fam.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600) #### fam CCA without most highly correlated variable #### # no difference in the results cca_raw2_fam <- cca(spiders_fam ~ ., env_trimmed) # trimmed set of environmental variables summary(cca_raw2_fam) anova(cca_raw2_fam,step=1000) anova(cca_raw_fam,by="axis",step=1000) anova(cca_raw_fam, by = "term", permutations = 999) vif.cca(cca_raw_fam) r2adjust_raw2_fam<-RsquareAdj(cca_raw_fam)$adj.r.squared cca_stepforward2_fam <-ordiR2step(cca_0_fam, scope=formula(cca_raw2_fam), R2scope =r2adjust_raw2_fam, direction="forward", permutations=999 ) vif.cca(cca_stepforward2_fam) summary(cca_stepforward2_fam) anova.cca(cca_stepforward2_fam,step=1000,by="axis") anova(cca_stepforward2_fam, by = "term", permutations = 999) r2_adj_cca2_fam<-RsquareAdj(cca_stepforward2_fam)$adj.r.squared #### CCA combo plot figure 7 #### cca_plot_fig7 <- cca_plot_fam + cca_plot_msp + plot_annotation(tag_levels = 'A') ggsave(cca_plot_fig7, filename = "Figures/cca_combined.pdf", device = "pdf", width = 340, height = 160, units = "mm", dpi = 600)