# Data analysis script for „Rainforest conversion to rubber and oil palm reduces abundances and diversity of canopy spiders„ submitted to PeerJ May 2022
# spider diversity in Efforts Indonesian project Z02
# samples collected by from Jochen Drescher
# identification by Daniel Ramos and Mayanda Lia
# data initially analysed in R 3.6.2
# script finalised 19 May 2022 in R 4.1.2
# assumes a project structure with data files located within the same directory as the script in a folder named 'Data' and saves output to a similar folder named 'Figures'

# library -----------------------------------------------------------------
# devtools::install_github("mikabr/ggpirate")
# devtools::install_github("tamarahartke/RankAbund")
# devtools::install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
# BiocManager::install("phyloseq") # dependency of metagMisc which needs to be installed separately
# devtools::install_github("vmikk/metagMisc")
# remotes::install_github("gavinsimpson/ggvegan")

Packages <- c(
  "tidyverse", # v 1.3.1
  "fitdistrplus", # v1.1-8
  "outliers", # v0.15
  "multcomp", # v1.4-19
  "ggpirate", #v 0.1.2
  "RankAbund", # v0.2.0
  "vegan", # v2.6-2
  "reshape2", # v1.4.4
  "car", # v3.0-13
  "DHARMa", # v0.4.5
  "MuMIn", # v1.46.0
  "betapart", # v1.5.6
  "pairwiseAdonis", # v0.4
  "metagMisc", # v0.0.4
  "betareg", # v3.1-4
  "lmtest", # v0.9-40
  "tableone", # v0.13.2
  "GGally", # v2.1.2
  "ggvegan", # v2.1.2
  "ggpubr", # v0.4.0
  "emmeans", # v1.7.4-1
  "patchwork" # v1.1.1
  )

lapply(Packages, library, character.only= T)

# data ---------------------------------------------------

# community matrix with plot ID in the first column. Remaining columns contain the total number of individuals (adults and juveniles) from each taxon which were caught in canopy fogging traps situated beneath two target canopies per research plot, ie a total of 32, 1m² traps. There are two dataframes: one at the family level, one at the morphospecies level

#### family-level dataset ####
spiders_fam <- read.csv2("Data/EFForTS-Z02_Canopy.Spiders.2013_Abundance_by_family.csv")

# add row names
rownames(spiders_fam) <- spiders_fam$Row.Labels

# Extracting landuse systems & landscape plot names
LandUse <- factor(substr(spiders_fam$Row.Labels, 2, 2),    
                  levels = c("F", "J", "R", "O"),          
                  labels = c("Forest", "Jungle Rubber",    
                             "Rubber", "Oil Palm"))  

LandScape <- factor(substr(spiders_fam$Row.Labels, 1, 1),  
                             levels = c("B", "H"),                  
                             labels = c("Bukit Duabelas",           
                                        "Harapan"))                 

LSLU <- factor(substr(spiders_fam$Row.Labels, 1, 2),                
               levels = c("BF", "BJ", "BR", "BO",                   
                          "HF", "HJ", "HR", "HO"))                  

spiders_fam <- spiders_fam[ ,-c(1)] # drop plot ID column

#### morphospecies dataset ####
# this includes all morphospecies without the juveniles which were identified only to family level
spiders_spp <- read.csv("Data/EFForTS-Z02_Canopy.Spiders.2013_Abundance_by_morphospecies.csv")

# add row names
rownames(spiders_spp) <- spiders_spp$Row.Labels      
rownames(spiders_fam) == rownames(spiders_spp) # rows (plots) are in the same order; we do not need to create new landuse and landscape vectors

spiders_spp <- spiders_spp[,-c(1)] # drop plot ID column

# total (calculated) biomass of all spiders in mg/m^2, by plot
biomass.df <- read.csv("Data/Biomass.csv")
colnames(biomass.df)[2] <- 'TBM' # shorten the column name

# plots are in the same order as the other dataframes
biomass.df$LU <- factor(substr(biomass.df$Plot, 2, 2),
                        levels = c("F", "J", "R", "O"),
                        labels = c("Forest", "Jungle Rubber",
                                   "Rubber", "Oil Palm"))

biomass.df$LS <- factor(substr(biomass.df$Plot, 1, 1),
                        levels = c("B", "H"),
                        labels = c("Bukit Duabelas",
                                   "Harapan"))


# housekeeping -------------------------------------------

# set random seed, so we'll get the same results if we run everything again
set.seed(15)

# system abbreviations, colors
system_col = c("#15983DFF", "#0C5BB0FF", "#FEC10BFF", "#EE0011FF") 
system_col_double <- c(system_col, system_col)

system_names = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")   

LU_abb = c("F", "J", "R", "O")                                      

double_col <- rep(system_col, each = 2)

# specs for a nicer-looking ggplot
cleanup <- theme(panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(),
                 panel.background = element_blank(),
                 axis.text = element_text(colour = "black"),
                 #axis.line = element_line(color = "black"),
                 panel.border = element_rect(fill = NA),
                 plot.margin = unit(c(4,4,4,4), "pt"))


# Abundance analysis------------------------------------------------------
abund <- rowSums(spiders_fam)/32 # divide by number of 1m² traps to get individuals/m²        
med_abund <- tapply(abund, LandUse, median)
mean_abund <- tapply(abund, LandUse, mean)                     
sd_abund <- tapply(abund, LandUse, sd) 

# check distribution of the data
descdist(abund, discrete = F)
fit.norm <- fitdist(abund, "norm")                            
fit.lnorm <- fitdist(abund, "lnorm") 

plot(fit.norm)                                                
plot(fit.lnorm)

fit.norm$aic   # 208.4912                                     
fit.lnorm$aic  # 202.7879

## normal enough, but variance not homogeneous
shapiro.test(abund)  # p.value = 0,05184
qqnorm(abund)                                                      
qqline(abund) 
bartlett.test(abund ~ LandUse) # p-value = 0.01753
bartlett.test(abund ~ LandScape) # p-value = 0.6469
bartlett.test(abund ~ interaction(LandUse,LandScape)) # p-value = 0.002458

## log transformation improves the homogeneity of variance -> glm with log link function
qqnorm(log(abund))                                                 
qqline(log(abund))                                                 
shapiro.test(log(abund)) # p.value = 0.747                      
bartlett.test(log(abund) ~ LandUse) # p-value = 0.353
bartlett.test(log(abund) ~ LandScape) # p-value = 0.1719
bartlett.test(log(abund) ~ interaction(LandUse,LandScape)) # p-value = 0.05636

# we'll include the interaction in the initial model
interaction.plot(LandScape, LandUse, log(abund))                  
boxplot(log(abund) ~ LandUse * LandScape)     

#### step-wise simplification to find minimal adequate model ####
abund_glm <- glm(abund~LandUse * LandScape, family = gaussian(link = log)) 
summary(abund_glm) # overdispersed? residual deviance >> degrees of freedom 
# model checking with DHARMa: dispersion tests look fine, so will continue anyway
abu_sim <- simulateResiduals(abund_glm)
testResiduals(abu_sim)

abund_glm2 <- glm(abund~LandUse + LandScape, family = gaussian(link = log))    
anova(abund_glm2, test = "F")                                               
            
abund_glm3 <- glm(abund~LandUse, family = gaussian(link = log))           
anova(abund_glm3, test = "F")  

# does including landscape improve the model? no
anova(abund_glm2, abund_glm3, test = 'Chisq')                          

# save the results for writing the paper
writeLines(capture.output(anova(abund_glm3, test = "F")), con="Figures/abund_glm3.txt")

#### model checking ####
plot(abund_glm3) # plots suggest potential problems with obser. BJ3, BJ5, HF4 

# DHARMa
abu_sim <- simulateResiduals(abund_glm3)
testResiduals(abu_sim)
plot(abu_sim)
plotResiduals(abu_sim$scaledResiduals, LandUse)

#### Abundance without influential observation HF4 ####

# plot of the model suggests plot HF4 may skew our results, so we will compare previous model to one without HF4 (row 20).
abu_out <- abund[-20]
LS <- LandScape[-20]
LU <- LandUse[-20]

med_abund_out <-  tapply(abu_out, LU, median)
mean_abund_out <- tapply(abu_out, LU, mean)                     
sd_abund_out  <-  tapply(abu_out, LU, sd)

# does this change the distribution of the data?
descdist(abu_out, discrete = F)

fit.norm <- fitdist(abu_out, "norm")                            
fit.lnorm <- fitdist(abu_out, "lnorm") 

plot(fit.norm)                                                
plot(fit.lnorm)

fit.norm$aic   # 193.6862                                    
fit.lnorm$aic  # 191.7096

## variance problems
shapiro.test(abu_out) # p-value = 0.155
qqnorm(abu_out)                                                      
qqline(abu_out) 
bartlett.test(abu_out ~ LU) # p-value = 0.0568
bartlett.test(abu_out ~ LS) # p-value = 0.339
bartlett.test(abu_out ~ interaction(LU, LS)) # p-value = 0.02394

## log transformation improves the homogeneity of variance -> glm with log link function
shapiro.test(log(abu_out))                       # p-val = 0.3908     
bartlett.test(log(abu_out) ~ LU) # p-value = 0.2129
bartlett.test(log(abu_out) ~ LS) # p-value = 0.3199
bartlett.test(log(abu_out) ~ interaction(LU, LS))  # p-val = 0.06018    

interaction.plot(LS, LU, abu_out)
boxplot(abu_out~LU*LS)

# testing differences between land uses and landscapes without HF4 
abund_glm_out <- glm(abu_out ~ LU * LS, family = gaussian(link = log)) 
summary(abund_glm_out)
anova(abund_glm_out, test = "F")                                      

###### stepwise model simplification without H4 ####
abund_glm2_out <- glm(abu_out~LU + LS, family = gaussian(link = log)) 
summary(abund_glm2_out)
anova(abund_glm2_out, test = "F")

# save output of final model without HF4
writeLines(capture.output(summary(abund_glm2_out)), con="Figures/abund_glm2_without_HF4_summary.txt")
writeLines(capture.output(anova(abund_glm2_out, test = "F")), con="Figures/abund_glm2_without_HF4.txt")

###### checking model without HF4 ####
plot(abund_glm2_out) # plots highlight observations BJ3, BJ5, (HJ4, HR2)
# DHARMa
abu_sim <- simulateResiduals(abund_glm2_out)
testResiduals(abu_sim)
plot(abu_sim)
plotResiduals(abu_sim$scaledResiduals, LU)

## the potentially influential observations are extreme as HF4 was, so leave them in
# save summary table with and without HF4
abund_summary <- rbind(mean_abund, med_abund, sd_abund, mean_abund_out, med_abund_out, sd_abund_out)
write.csv(abund_summary, "Figures/abund_means_table.csv")

#### Abundance Multiple comparisons ####

# model without HF4
# no interaction between the factors, so set up the model with hypothesis for multiple comparisons
abund_model <- glht(model = abund_glm2_out, linfct = mcp(LU = "Tukey"))

# pairwise t.test with holm correction
abund_HSD <- summary(abund_model, adjusted(type = c(p.adjust.methods = c("holm"))))

write.csv(cbind(z_value = abund_HSD$test$tstat, p_value = abund_HSD$test$pvalues), "Figures/abund_HSD_withoutHF4.csv") # save test values for manuscript

# for comparison, mult comps on  model GLM2 (complete dataset): there is no difference in the pattern
# no interaction between the factors, so set up the model with hypothesis for multiple comparisons
abund_model_all <- glht(model = abund_glm3, linfct = mcp(LandUse = "Tukey"))

# pairwise t.test with holm correction
abund_HSD_all <- summary(abund_model_all, adjusted(type = c(p.adjust.methods = c("holm"))))

write.csv(cbind(z_value = abund_HSD$test$tstat, p_value = abund_HSD$test$pvalues), "Figures/abund_HSD_fulldataset.csv") # save test values for manuscript

# get the letters to add to the plot
abund_lu_cld <- cld(abund_HSD, decreasing = T) # want forest first, which has highest values
abu_cld <- abund_lu_cld$mcletters$Letters

# and do this for landscape to see the estimated contrast and its std error
abund_ls_model <- glht(model = abund_glm2_out, linfct = mcp(LS = "Tukey"))
# pairwise t.test with holm correction
abund_ls_HSD <- summary(abund_ls_model, adjusted(type = c(p.adjust.methods = c("holm"))))   

# Abundance plot ----------------------------------------------------------
# we use the plot without HF4

# make data ggplot2 friendly
abu.df <- as.data.frame(abu_out)
abu.df$LU <- LU
abu.df$LS <- LS

# pirate plot dissected: horizontal line = means, boxes = 95% confidence intervals, violins = density

# landscapes pooled
abu_pirate_pooled <- ggplot(abu.df, aes(x = LU, y = abu_out)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LU, fill = LU),
              violins = T,
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("abundance, per "~m^2), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(abu.df$abu_out)*1.05,
    label = abu_cld)

ggsave(abu_pirate_pooled, filename = "Figures/abund_pooled.pdf",
       device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)

#### but for comparison, here is the plot with HF4
# make data ggplot2 friendly
abund.df <- as.data.frame(abund)
abund.df$LU <- LandUse
abund.df$LS <- LandScape

# pirate plot dissected: horizontal line = means, boxes = 95% confidence intervals, violins = density

# landscapes pooled
abu_pirate_withHF4 <- ggplot(abund.df, aes(x = LU, y = abund)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LU, fill = LU),
              violins = T,
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("abundance, per "~m^2), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black"))


# Rank Abundance ---------------------------------------------------
ranks_all <- spec.ranks(spiders_fam, LandUse) 
ra_plot_all <- ra.plot(ranks_all, system_col) 

ggsave(ra_plot_all, filename = "Figures/rank_abund_all.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)

# radfit fits the data from each plot to each of five different models. The model with the lowest AIC best explains the data observed in that plot.  
rad <- radfit(spiders_fam) # non-convergence warnings -- it is a relatively small data set
rad
plot(rad)

# compare the best model between land use systems
rad_dev <- sapply(rad, function(x) unlist(lapply(x$models, deviance)))
rad_dev <- t(rad_dev)
rad_dev_long <- melt(rad_dev)
rad_dev_long$LandUse <- factor(substr(rad_dev_long$Var1, 2, 2), # extract land use systems from Plot Names
                               levels = c("F", "J", "R", "O"), 
                               labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm"))
rad_dev_long$LandScape <- factor(substr(rad_dev_long$Var1, 1, 1), # extract land use systems from Plot Names
                                 levels = c("B", "H"), 
                                 labels = c("Bukit Duabelas", "Harapan"))

# differences in residual deviance between models by land use
rad_aov <- lm(value ~ LandUse*LandScape + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models
summary(rad_aov)
summary(rad_aov)$fstatistic
anova(rad_aov, test = 'F')

rad_aov2 <- lm(value ~ LandUse+LandScape + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models
anova(rad_aov2, test = 'F')

rad_aov3 <- lm(value ~ LandUse + Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models
anova(rad_aov3, test = 'F')
anova(rad_aov2, rad_aov3, test = "Chisq") # models do not differ significantly, so use the simpler

# set up the model with hypothesis for multiple comparisons
rad_model <- glht(model = rad_aov3, linfct = mcp(LandUse = "Tukey"))
# pairwise t.test with holm correction
rad_HSD <- summary(rad_model, adjusted(type = c(p.adjust.methods = c("holm"))))
### RESULT: 
rad_HSD
# no differences between oil palm and rubber or Jungle rubber and forest (same models fit them equally well) 
# those two groups are significantly different from each other in the remaining pairwise comparisons

writeLines(capture.output(rad_HSD), con="Figures/rad_HSD.txt") # save test values for manuscript

# Richness ----------------------------------------------------------------
# number of species in each landscape

numb_spider <- specnumber(spiders_spp)
med_rich <- tapply(numb_spider, LandUse, median)
mean_rich <- tapply(numb_spider, LandUse, mean)
sd_rich <- tapply(numb_spider, LandUse, sd)
rich_summary <- rbind(mean_rich, med_rich, sd_rich)
write.csv(abund_summary, "Figures/rich_means_table.csv")

# data frame for plotting
rich.df <- as.data.frame(numb_spider)
colnames(rich.df) <- c("rich")
rich.df$LandUse <- LandUse
rich.df$LandScape <- LandScape
rich.df$LSLU <- LSLU

## check distribution of data
descdist(numb_spider, discrete = T) 
descdist(numb_spider, discrete = F) 

fit.nbin <- fitdist(numb_spider, "nbinom") 
fit.pois <- fitdist(numb_spider, "pois")
fit.norm <- fitdist(numb_spider, "norm")
fit.lnorm <- fitdist(numb_spider, "lnorm")
fit.gamma <- fitdist(numb_spider, "gamma")

plot(fit.nbin)
qqp(numb_spider, "nbinom", size = fit.nbin$estimate[[1]], mu = fit.nbin$estimate[[2]])
plot(fit.pois)
qqp(numb_spider, "pois", lambda=fit.pois$estimate)
plot(fit.norm)
plot(fit.lnorm)
plot(fit.gamma)

#lowest aic indicates best fit
fit.nbin$aic
fit.pois$aic
fit.norm$aic
fit.lnorm$aic
fit.gamma$aic

# maybe it's close enough to normal?
shapiro.test(numb_spider) # p = 0.1496
bartlett.test(numb_spider ~ interaction(LandUse, LandScape)) # p-value= 0.1555
bartlett.test(numb_spider ~ LandUse) # p-value = 0.1766
bartlett.test(numb_spider ~ LandScape) # p-value = 0.4637

interaction.plot(LandScape, LandUse, numb_spider)
boxplot(numb_spider ~ LandUse * LandScape)

#### testing for differences between land uses and landscapes ####
rich_lm <- lm(numb_spider~LandUse * LandScape) 
summary(rich_lm)
anova(rich_lm, test = "F")

rich_lm2 <- lm(numb_spider~LandUse + LandScape)
summary(rich_lm2)
anova(rich_lm2, test = "F")

rich_lm3 <- lm(numb_spider~LandUse)
summary(rich_lm3)
anova(rich_lm3, test = "F")

anova(rich_lm2, rich_lm3)

# final model: landuse only
writeLines(capture.output(summary(rich_lm3)), con="Figures/rich_lm_summary.txt") # save test values for manuscript
writeLines(capture.output(anova(rich_lm3, test = "F")), con="Figures/rich_lm.txt") # save test values for manuscript

#### model checking with DHARMa: everything looks fine ####
rich_sim <- simulateResiduals(rich_lm3)
testResiduals(rich_sim)
plot(rich_sim)
plotResiduals(rich_sim$scaledResiduals, rich.df$LandUse)

plot(rich_lm3)  # plots suggest potentially influential observations BJ3, BJ5, HF4

#### multiple comparisons ####
# no interaction between the factors, so set up the model with hypothesis for multiple comparisons
rich_model1 <- glht(model = rich_lm3, linfct = mcp(LandUse = "Tukey"))
# pairwise t.test with holm correction
rich_HSD <- summary(rich_model1, adjusted(type = c(p.adjust.methods = c("holm"))))

writeLines(capture.output(rich_HSD), con="Figures/rich_hsd.txt") # save test values for manuscript

# get the letters for the plot
rich_lu_cld <- cld(rich_HSD, decreasing = T) # want forest first, which has highest values
rich_cld <- rich_lu_cld$mcletters$Letters

# Richness plot ----------------------------------------------------------
# landscapes pooled
rich_pirate_pooled <- ggplot(rich.df, aes(x = LandUse, y = rich)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LandUse, fill = LandUse), violins = T,
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = "species richness", x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) 

ggsave(rich_pirate_pooled, filename = "Figures/richness_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)


# Biomass -----------------------------------------------------------------
#### data exploration ####
med_biom <-  tapply(biomass.df$TBM, LandUse, median)
mean_biom <- tapply(biomass.df$TBM, LandUse, mean)                     
sd_biom  <-  tapply(biomass.df$TBM, LandUse, sd) 
biomass_summary <- rbind(mean_biom, med_biom, sd_biom)
write.csv(biomass_summary, "Figures/biomass_means_table.csv")

descdist(biomass.df$TBM, discrete = F)

shapiro.test(biomass.df$TBM) # data is probably not normal enough
bartlett.test(biomass.df$TBM, biomass.df$LU) # and homogeneity of variance is not good enough
bartlett.test(biomass.df$TBM, biomass.df$LS)
bartlett.test(biomass.df$TBM, interaction(biomass.df$LU, biomass.df$LS))

shapiro.test(log(biomass.df$TBM)) # log-transformed data is normal enough
bartlett.test(log(biomass.df$TBM), biomass.df$LU) # and homogeneity of variance is fine --> gaussian glm log link
bartlett.test(log(biomass.df$TBM), biomass.df$LS)
bartlett.test(log(biomass.df$TBM), interaction(biomass.df$LU, biomass.df$LS))

#### glm ####

# gaussian error distribution with log link function
biomass.glm <- glm(TBM ~ LU * LS, data = biomass.df, family = gaussian(link = 'log'))
summary(biomass.glm) # residual deviance >> df but wait and see what DHARMa says
anova(biomass.glm, test = 'F')

biomass.glm2 <- glm(TBM ~ LU + LS, data = biomass.df, family = gaussian(link = 'log'))
anova(biomass.glm2, test = 'F')

biomass.glm3 <- glm(TBM ~ LU, data = biomass.df, family = gaussian(link = 'log'))
anova(biomass.glm3, test = 'F')

# no significant difference between models with and without landscape, so we use the simpler model
anova(biomass.glm2, biomass.glm3, test = 'Chi')
AICc(biomass.glm2, biomass.glm3)

writeLines(capture.output(anova(biomass.glm3)), con="Figures/biomass_glm.txt") 
writeLines(capture.output(summary(biomass.glm3)), con="Figures/biomass_glm_summary.txt") 

# model checking with DHARMa
bio_sim <- simulateResiduals(biomass.glm3)
testResiduals(bio_sim) # no complaints about overdispersion, even though residual dis >> df!
plot(bio_sim) # warning here, but everything else looks fine
plotResiduals(bio_sim$scaledResiduals, biomass.df$LU) 

#### multiple comparisons ####
biomass.mc <- glht(model = biomass.glm3, linfct = mcp(LU = "Tukey"))
# pairwise t.test with holm correction
biomass.HSD <- summary(biomass.mc, adjusted(type = c(p.adjust.methods = c("holm"))))
writeLines(capture.output(biomass.HSD), con="Figures/biomass_Tukey.txt")

# save the letters for plotting
biomass.cld <- cld(biomass.HSD, decreasing = T) # want forest first
biomass.annotation <- biomass.cld$mcletters$Letters


#### plotting ####

biomass.pirate <- ggplot(biomass.df, aes(x = LU, y = TBM)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LU, fill = LU),
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("biomass, mg per "~m^2), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(biomass.df$TBM)*1.05,
    label = biomass.annotation)

ggsave(biomass.pirate, filename = "Figures/biomass_pirate.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)


# Alpha Diversity ---------------------------------------------------
# inverse Simpson Index for alpha diversity
div_spiders <- vegan::diversity(spiders_spp, "inv") # calculate the index
med_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, median)
mean_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, mean)
sd_alpha <- tapply(vegan::diversity(spiders_spp, "inv"), LandUse, sd)

adiv_summary <- rbind(mean_alpha, med_alpha, sd_alpha)
write.csv(abund_summary, "Figures/inv_simp_means_table.csv")

# check distribution of data
descdist(div_spiders, discrete = F)

fit.norm <-  fitdist(div_spiders, "norm")
fit.lnorm <- fitdist(div_spiders, "lnorm")
fit.gamma <- fitdist(div_spiders, "gamma")

plot(fit.norm)
plot(fit.lnorm)
plot(fit.gamma)

#lowest aic indicates best fit
fit.norm$aic
fit.lnorm$aic # lowest AIC
fit.gamma$aic

# close enough to normal? yes
shapiro.test(div_spiders) # p-value = 0.05555
bartlett.test(div_spiders ~ LandUse) # p-value = 0.1505
bartlett.test(div_spiders ~ LandScape) # p-value = 0.8129
bartlett.test(div_spiders ~ interaction(LandUse, LandScape)) # p-value = 0.2712

interaction.plot(LandScape, LandUse, div_spiders)
boxplot(div_spiders ~ LandUse * LandScape)

#### testing for differences between land uses and landscapes ####                   
alpha_lm <- lm(div_spiders ~ LandUse * LandScape)
summary(alpha_lm)
anova(alpha_lm, test = "F")

# step-wise simplification
alpha_lm2 <- lm(div_spiders~LandUse + LandScape)
summary(alpha_lm2)
anova(alpha_lm2, test = "F")

alpha_lm3 <- lm(div_spiders~LandUse)
summary(alpha_lm3)
anova(alpha_lm3, test = "F")

anova(alpha_lm2, alpha_lm3, test = "Chisq") 
AICc(alpha_lm2, alpha_lm3)

#### model checking with DHARMa: everything looks fine ####
adiv_sim <- simulateResiduals(alpha_lm2)
testResiduals(adiv_sim)
plot(adiv_sim)
plotResiduals(adiv_sim$scaledResiduals, LandUse)

plot(alpha_lm2)  # plots suggest potentially influential observations HF3, HJ2, BJ4

# explanatory power of both variables is low, but they do increase the explanatory power of the model so we'll save the additive model for writing the text
writeLines(capture.output(summary(alpha_lm2)),con="Figures/inv_sim_lm2_summary.txt")
writeLines(capture.output(anova(alpha_lm2, test = "F")),con="Figures/inv_sim_lm2_anova.txt")

# Alpha diversity plot ----------------------------------------------------
## not included in the paper, but here it is anyway :) 
alpha.df <- as.data.frame(div_spiders)
alpha.df$LandUse <- LandUse
alpha.df$LandScape <- LandScape

# LandScapes pooled
alpha_pirate <- ggplot(alpha.df, aes(x = LandUse, y = div_spiders)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LandUse, fill = LandUse), violins = T,
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) +
  labs(y = "Inverse Simpson's Index", x = NULL) +
  scale_x_discrete(labels = LU_abb)

ggsave(alpha_pirate, filename = "Figures/alpha_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)

# partitioning beta diversity ---------------------------------------------

# function to process dist2list dataframes, keeping only rows comparing the same LU within the same LS and 
# adding names for plotting. Requires post-dist2list dataframe (converted distance matrix), returns trimmed df
# with factor names.

trim_dist <- function(dis_list)
{
  dis_list$lu1 <-
    factor(substr(dis_list$col, 2, 2)) # extract land use systems from Plot Names
  dis_list$lu2 <- factor(substr(dis_list$row, 2, 2))
  
  df <-
    dis_list[dis_list$lu1 == dis_list$lu2, ] # keep only rows which compare the same land use system
  df$LU <- factor(
    df$lu1,
    levels = c("F", "J", "R", "O"),
    labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")
  ) # correct order for plotting
  
  df$ls1 <-
    factor(substr(df$col, 1, 1), labels = c("Bukit Duabelas", "Harapan")) # extract landscapes from Plot Names
  df$ls2 <-
    factor(substr(df$row, 1, 1), labels = c("Bukit Duabelas", "Harapan"))
  df$LS <-
    factor(paste(substr(df$col, 1, 1), substr(df$row, 1, 1), sep = ""))
  df <- df[df$ls1==df$ls2,]
  return(df)
}

# convert community matrix to presence/absence 
comm_pa<-ifelse(spiders_spp > 0, 1, 0)

# calculate index and partitions. beta.pair returns three matrices: [1] turnover, [2] nestedness, [3] overall beta diversity
dist<-beta.pair(comm_pa, 
                # index.family="jaccard") 
                index.family="sorensen")

#### step 1 within group variability ####
# compare multivariate dispersions from beta diversity distance matrix for the aspect of interest (turnover, nestedness, overall beta diversity) grouped by the factor. How variable are replicates within each factor level (distance to group centroid)?

# start with total dissimilarity measured as Jaccard/Sorenesen pair-wise dissimilarity
# both factors together (AxB)
bd_LSLU<-betadisper(dist[[3]], LSLU) # double [] are important here 

plot(bd_LSLU) # plots the first two axes of the principal coordinates analysis (PCoA)

boxplot(bd_LSLU)

bSor_LSLU <- anova(bd_LSLU) # test for differences using the spatial median as the group centroid. Are there any significant differences in within-group variability? no

betadisper_df <- tibble(component = "total dissimilarity", factor = "LS x LU", f_value = bSor_LSLU$'F value'[1], p_value = bSor_LSLU$'Pr(>F)'[1]) # save test values for later

# for LS only
bd_LS<-betadisper(dist[[3]],LandScape)
plot(bd_LS)
boxplot(bd_LS)
bSor_LS <- anova(bd_LS) # no sig differences
betadisper_df <- add_row(betadisper_df, component = "total dissimilarity", factor = "LS", f_value = bSor_LS$'F value'[1], p_value = bSor_LS$'Pr(>F)'[1])

# for LU only
bd_LU<-betadisper(dist[[3]],LandUse)
plot(bd_LU)
boxplot(bd_LU)
bSor_LU <- anova(bd_LU) # no sig differences
betadisper_df <- add_row(betadisper_df, component = "total dissimilarity", factor = "LU", f_value = bSor_LU$'F value'[1], p_value = bSor_LU$'Pr(>F)'[1])

# next spatial turnover, measured as the turnover fraction of Jaccard pair-wise dissimility/Simpson pair-wise dissimilarity
# both factors together
to_bd_LSLU<-betadisper(dist[[1]],LSLU) 
plot(to_bd_LSLU)
boxplot(to_bd_LSLU)
turn_LSLU <- anova(to_bd_LSLU) # no sig diffs
betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LS x LU", f_value = turn_LSLU$'F value'[1], p_value = turn_LSLU$'Pr(>F)'[1])

# for LS only
to_bd_LS<-betadisper(dist[[1]],LandScape)
plot(to_bd_LS)
boxplot(to_bd_LS)
turn_LS <- anova(to_bd_LS) # no sig differences
betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LS", f_value = turn_LS$'F value'[1], p_value = turn_LS$'Pr(>F)'[1])

# for LU only
to_bd_LU<-betadisper(dist[[1]],LandUse)
plot(to_bd_LU)
boxplot(to_bd_LU)
turn_LU <- anova(to_bd_LU) # sig differences between variability of replicates within land uses
betadisper_df <- add_row(betadisper_df, component = "turnover", factor = "LU", f_value = turn_LU$'F value'[1], p_value = turn_LU$'Pr(>F)'[1])

# and finally nestedness-resultant dissimilarity, measured as the nestedness fraction of Jaccard/Sorensen pair-wise dissimilarity
# both factors together
nest_bd_LSLU<-betadisper(dist[[2]],LSLU) 
plot(nest_bd_LSLU)
boxplot(nest_bd_LSLU)
nest_LSLU <- anova(nest_bd_LSLU) # no sig diffs
betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LS x LU", f_value = nest_LSLU$'F value'[1], p_value = nest_LSLU$'Pr(>F)'[1])

# for LS only
nest_bd_LS<-betadisper(dist[[2]],LandScape)
plot(nest_bd_LS)
boxplot(nest_bd_LS)
nest_LS <- anova(nest_bd_LS) # sig differences between variability of replicates within landscapes
betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LS", f_value = nest_LS$'F value'[1], p_value = nest_LS$'Pr(>F)'[1])

# for LU only
nest_bd_LU<-betadisper(dist[[2]],LandUse)
plot(nest_bd_LU)
boxplot(nest_bd_LU)
nest_LU <- anova(nest_bd_LU) # no differences between variability of replicates within landuses
betadisper_df <- add_row(betadisper_df, component = "nestedness", factor = "LU", f_value = nest_LU$'F value'[1], p_value = nest_LU$'Pr(>F)'[1])

write.csv2(format(as.data.frame(betadisper_df), digits = 3), file = "Figures/beta_part_Sorensen_disp.csv")

#### Step 2: adonis ####
# compare differences in community composition, ie differences in the locations of the centroids in multi-dimensional space. 
# done separately for each of the 3 beta diversity components. test interactions, simplify model.

# overall beta diversity
ado_overall <- adonis2(dist[[3]] ~ LandUse * LandScape) # interaction p = 0.007

pwa_overall_LSLU <- pairwise.adonis(dist[[3]], LSLU, p.adjust.m = "fdr") # most comparisons significant
# pwa_overall_LS <- pairwise.adonis(dist[[3]], LandScape, p.adjust.m = "fdr")
# pwa_overall_LU <- pairwise.adonis(dist[[3]], LandUse, p.adjust.m = "fdr")

# turnover 
ado_to <- adonis2(dist[[1]]~ LandUse * LandScape) # interaction p = 0.002

pwa_to_LSLU <- pairwise.adonis(dist[[1]], LSLU, p.adjust.m = "fdr") # most comparisons significant
# pwa_to_LS <- pairwise.adonis(dist[[1]], LandScape, p.adjust.m = "fdr")
# pwa_to_LU <- pairwise.adonis(dist[[1]], LandUse, p.adjust.m = "fdr")

# nestedness 
ado_n <- adonis2(dist[[2]] ~ LandUse * LandScape) # interaction p = 0.753
ado_n2 <- adonis2(dist[[2]] ~ LandUse + LandScape) # landscape p = 0.999
ado_n3 <- adonis2(dist[[2]] ~ LandUse)

# pwa_n_LSLU <- pairwise.adonis(dist[[2]], LSLU, p.adjust.m = "fdr")
# pwa_n_LS <- pairwise.adonis(dist[[2]], LandScape, p.adjust.m = "fdr")
pwa_n_LU <- pairwise.adonis(dist[[2]], LandUse, p.adjust.m = "fdr") # forest and jungle rubber significantly different from oil palm and rubber

# based on these results, we will do the nmds later with interaction of landuse and landscape for overall betadiversity and turnover, just landuse for nestedness

#### Step 3: Differences by groups ####

# is the mean beta diversity, turnover, nestedness different between each landuse and landscape category?

# turn the distance matrices into dataframes values for within LS and LU comparisons
dis_o <- dist2list(dist[[3]]) # <3 metagMisc <3
within_o <- trim_dist(dis_o) # my processing function above

dis_t <- dist2list(dist[[1]])
within_t <- trim_dist(dis_t)

dis_n <- dist2list(dist[[2]])
within_n <- trim_dist(dis_n)

# (g)lm approach: overall
descdist(within_o$value, discrete = F)

shapiro.test (within_o$value)
bartlett.test(within_o$value ~ within_o$lu1) 
bartlett.test(within_o$value ~ within_o$LS) # meets requirements of lm
bartlett.test(within_o$value ~ interaction(within_o$lu1, within_o$LS))

fit_o <- lm(value~LU*LS, data = within_o)
fit_o2 <- step(fit_o)
summary(fit_o2)
anova(fit_o2) # no significant differences by landscape or landuse

# (g)lm approach: turnover
descdist(within_t$value, discrete = F)

shapiro.test (within_t$value)
bartlett.test(within_t$value ~ within_t$lu1) 
bartlett.test(within_t$value ~ within_t$LS) # meets requirements of lm
bartlett.test(within_t$value ~ interaction(within_t$lu1, within_t$LS))

fit_t <- lm(value ~ LU * LS, data = within_t)
fit_t2 <- step(fit_t)
summary(fit_t2)
anova(fit_t2) # significant interaction between landscape and land use

# (g)lm approach: turnover
descdist(within_n$value, discrete = F)
fit.norm <-  fitdist(within_n$value, "norm")
fit.lnorm <- fitdist(within_n$value + 0.00001, "lnorm") # there are two 0s in the data, which it doesn't like
fit.gamma <- fitdist(within_n$value + 0.00001, "gamma") 
fit.beta <-  fitdist(within_n$value + 0.00001, "beta") 

fit.norm$aic
fit.lnorm$aic
fit.gamma$aic
fit.beta$aic # much better

shapiro.test (within_n$value)
bartlett.test(within_n$value ~ within_n$lu1) 
bartlett.test(within_n$value ~ within_n$LS) # does not meet requirements of lm
bartlett.test(within_n$value ~ interaction(within_n$lu1, within_n$LS))

shapiro.test (log(within_n$value+1))
bartlett.test(log(within_n$value+1) ~ within_n$lu1) 
bartlett.test(log(within_n$value+1) ~ within_n$LS) # a bit dodgy still
bartlett.test(log(within_n$value+1) ~ interaction(within_n$lu1, within_n$LS))

# then let's pad the data to get rid of the 0s and use beta error distribution
within_n$padded <- within_n$value+0.00001
fit_n <- betareg(padded ~ LU * LS,
                 data = within_n)
summary(fit_n)
fit_n2 <- betareg(padded ~ LU + LS,
                  data = within_n)
summary(fit_n2)
lrtest(fit_n, fit_n2) 
AICc(fit_n, fit_n2) # model with interaction is better than the additive model

# kruskal-wallis tests -- same results 
within_o$LSLU <- as.factor(paste(within_o$LU, within_o$LS, sep = "_"))
within_t$LSLU <- as.factor(paste(within_t$LU, within_t$LS, sep = "_"))
within_n$LSLU <- as.factor(paste(within_n$LU, within_n$LS, sep = "_"))

kruskal.test(value~LSLU, data = within_o) #nsd
kruskal.test(value~LS,   data = within_o) #nsd
kruskal.test(value~LU,   data = within_o) #nsd

kruskal.test(value~LSLU, data = within_t) # p-value = 0.0323
# kruskal.test(value~LS,   data = within_t)
# kruskal.test(value~LU,   data = within_t)

kruskal.test(value~LSLU, data = within_n) # p-value = 0.04865
# kruskal.test(value~LS,   data = within_n)
# kruskal.test(value~LU,   data = within_n)

#### Step 4: visualisation ####
# we show nmds (below) in the paper, but this is a common way of visualising it, so we just made these figures for our own information.

beta_overall <- ggplot(within_o, aes(x = LU, y = value)) +
  cleanup +
  scale_color_manual(values = system_col) +
  geom_pirate(aes(colour = LU, fill = LU),
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, 
                                size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) +
  labs(y = "overall Beta diversity (Sorensen)", x = NULL)

beta_turnover <- ggplot(within_t, aes(x = LU, y = value)) + #interaction landuse and landscape
  cleanup +
  scale_color_manual(values = system_col_double) +
  geom_pirate(aes(colour = ls1),
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col_double, 
                                size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) +
  labs(y = "Beta diversity, turnover (pairwise Simpson)", x = NULL)

beta_nestedness <- ggplot(within_n, aes(x = LU, y = value)) + #interaction landuse and landscape
  cleanup +
  scale_color_manual(values = system_col_double) +
  geom_pirate(aes(colour = ls1),
              bars = F, points_params = list(shape = 16, size = 1),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col_double, 
                                size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1)) +
  labs(y = "Beta diversity, nestedness", x = NULL)

# ggsave(beta_overall, filename = "./figures/Beta_diversity/beta_overall.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)
# ggsave(beta_turnover, filename = "./figures/Beta_diversity/beta_turnover.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)
# ggsave(beta_nestedness, filename = "./figures/Beta_diversity/beta_nestedness.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)

#### Step 5: create means table ####
# combine all aspects into one data frame
dis_all <- as.data.frame(cbind(overall = within_o$value, 
                               turnover = within_t$value, 
                               nestness = within_n$value,
                               within_o[6], LS = within_o[,7], within_o[10])
)

summary_LULS <- CreateTableOne(c("overall", "turnover", "nestness"), c("LU", "LS"), dis_all, test = F)

summary_LU <- CreateTableOne(c("overall", "turnover", "nestness"), c("LU"), dis_all, test = F)

# shows on the screen, but also saves as csv file
write.csv2(print(summary_LU), file = "Figures/summary_betapart_LU.csv")
write.csv2(print(summary_LULS), file = "Figures/summary_betapart_LULS.csv")

# nmds for each beta diversity partition --------

# based on adonis results above, include interaction of LU and LS for overall and turnover, just LU in nestedness

# Helper-function from Ralf Schaefer to check for stress and dimensions for NMDS
# x must be a data-object which can be coerced into a matrix; 
# max is the maximum number of dimensions
# dist_meas is the distance measure
scree_values <- function(x, max, dist_meas, binary=F) # dist_meas can use any of the options in vegdist. 
{	
  xx <- as.matrix(x)
  scree.points = NULL
  scree.temp = 1
  for(i in 1:max)
  {
    sol.mds = NULL
    sol.mds  <- replicate(10, metaMDS(xx, k = i, trymax = 30, distance = dist_meas, binary)$stress, 
                          simplify = TRUE)
    scree.points <- append(scree.points, sol.mds)
  }
  return(scree.points)
}

# Calculate relationship between dimensions and stress
# turnover = 5 dimensions
scree.res <- scree_values(as.matrix(dist[[1]]), max = 10, dist_meas = "bray", binary=F)
# vectorise for plotting replicated runs of NMDS
plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) 
# Plot dimensions vs. stress
# Red line indicates threshold for "good" Stress value, in this case 5. 
# Above the line is too few dimensions, below is more than necessary. 
# We have chosen our stress value to be close to 0.1 to reduce the number of minimally helpful dimensions, 
# which we cannot visualise anyway. The closer the stress gets to 0.05 the better, but it is a case of diminishing returns.
plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions")
lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean))))
abline(h = 0.10, col = "red", lty = "dashed")

# nestedness = 3 dimensions
scree.res <- scree_values(as.matrix(dist[[2]]), max = 10, dist_meas = "bray", binary=F)
plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) 
plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions")
lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean))))
abline(h = 0.10, col = "red", lty = "dashed")

# overall = 4 dimensions
scree.res <- scree_values(as.matrix(dist[[3]]), max = 10, dist_meas = "bray", binary=F)
plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector")) 
plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions")
lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean))))
abline(h = 0.10, col = "red", lty = "dashed")

#### run ndms ####

NMDS_turnover <- metaMDS(dist[[1]], k = 5, trymax = 50)
stressplot(NMDS_turnover, main = paste("k = ", NMDS_turnover$ndim, ", Stress =", round(NMDS_turnover$stress, 3)))

NMDS_nestedness <- metaMDS(dist[[2]], k = 3)
stressplot(NMDS_nestedness, main = paste("k = ", NMDS_nestedness$ndim, ", Stress =", round(NMDS_nestedness$stress, 3)))

NMDS_overall <- metaMDS(dist[[3]], k = 4)
stressplot(NMDS_overall, main = paste("k = ", NMDS_overall$ndim, ", Stress =", round(NMDS_overall$stress, 3)))


#### plot nmds and manova ####
# based on adonis results above, include interaction of LU and LS for overall and turnover, just LU in nestedness

##### overall: interaction landuse and landscape #####
nmds_scores_o <- cbind(vegan::scores(NMDS_overall, display = 'sites'))
comm.lda <- lda(nmds_scores_o, interaction(LandUse, LandScape))

# multivariate anova; does the variable significantly predict variability in nmds scores?
comm_man_o <- manova(nmds_scores_o ~ LandUse * LandScape) 
sum_man_o <- summary(comm_man_o, test = 'Wilks')
writeLines(capture.output(sum_man_o), con="Figures/betadiv_part_overall_manova.txt")

# pairwise comparisons
o_man_emmeans <- emmeans(comm_man_o, ~ LandUse * LandScape)
pw.o <- test(contrast(o_man_emmeans, "pairwise", adjust = "fdr", side = "="))
writeLines(capture.output(pw.o), con="Figures/betadiv_part_overall_manova_pwc.txt")

# centroids for annotation
nmds_scores_o <- as.data.frame(nmds_scores_o)
nmds_scores_o$LSLU <- LSLU
site.centroid_o <- as.data.frame(nmds_scores_o) %>% 
  group_by(System = LSLU) %>%
  summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2))

# and finally the plot!
ordi_overall <- ggplot(data = nmds_scores_o,
                          mapping = aes(x = NMDS1,
                                        y = NMDS2)) +
  cleanup +
  scale_color_manual(values = system_col_double) +
  scale_fill_manual(values = alpha(c(system_col_double), (rep(c(0.65, 0.25), each = 4)))) +
  stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse
    geom = "polygon",
    level = 0.75,
    aes(fill = LSLU, color = LSLU)
  ) +
  geom_point(
    size = 1,
    shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan
    stroke = 0.5,
    fill = system_col[LandUse],
    show.legend = F
  ) +
  annotate(
    geom = "text",
    x = c(
      site.centroid_o$NMDS1.center[1] - 0.03, #BF
      site.centroid_o$NMDS1.center[2] + 0.1, #BJ
      site.centroid_o$NMDS1.center[3] + 0.06, #BR
      site.centroid_o$NMDS1.center[4], #BO
      site.centroid_o$NMDS1.center[5], #HF
      site.centroid_o$NMDS1.center[6] , #HJ
      site.centroid_o$NMDS1.center[7], #HR
      site.centroid_o$NMDS1.center[8] + 0.03#HO
    ),
    y = c(
      site.centroid_o$NMDS2.center[1] + 0.1, #BF
      site.centroid_o$NMDS2.center[2],  #BJ
      site.centroid_o$NMDS2.center[3]  + 0.06, #BR
      site.centroid_o$NMDS2.center[4] , #BO
      site.centroid_o$NMDS2.center[5] , #HF
      site.centroid_o$NMDS2.center[6] , #HJ
      site.centroid_o$NMDS2.center[7] + 0.05, #HR
      site.centroid_o$NMDS2.center[8] + 0.1  #HO
    ),
    label = levels(LSLU),
    fontface = "bold",
    size = 3
  ) +
  # annotate(geom = "text",
  #          x = min(nmds_cent_o$NMDS1) - 0.1,
  #          y = max(nmds_cent_o$NMDS2) + 0.1,
  #          label = paste("Landuse: Wilks =", round(comm_man_o_LSLU$stats["LandUse", "Wilks"], 3),
  #                        "F =", round(comm_man_o_LSLU$stats["LandUse", "approx F"], 3),
  #                        "p < 0.001",
  #                        "\nLandscape: Wilks =", round(comm_man_o_LSLU$stats["LandScape", "Wilks"], 3),
  #                       "F =", round(comm_man_o_LSLU$stats["LandScape", "approx F"], 3),
  #                        "p < 0.001",
  #                        "\nInteraction: Wilks =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "Wilks"], 3),
  #                        "F =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "approx F"], 3),
#                        "p =", round(comm_man_o_LSLU$stats["LandUse:LandScape", "Pr(>F)"], 4)
#          ),
#          fontface = "bold",
#          size = 2,
#          hjust = 0)+
theme(legend.position = "none")

ggsave(ordi_overall, 
       filename = "Figures/nmds_betapart_overall_naked.pdf", 
       device = "pdf", 
       width = 164, height = 164, units = "mm", dpi = 600)

##### turnover: interaction landuse and landscape #####
nmds_scores_t <- cbind(vegan::scores(NMDS_turnover, display = 'sites'))
comm.lda<-lda(nmds_scores_t, interaction(LandUse, LandScape))

comm_man_t <- manova(nmds_scores_t ~ LandUse * LandScape) 
sum_man_t <- summary(comm_man_t, test = 'Wilks')
writeLines(capture.output(sum_man_t), con="Figures/betadiv_part_turnover_manova.txt")

# pairwise comparisons
t_man_emmeans <- emmeans(comm_man_t, ~ LandUse * LandScape)
pw.t <- test(contrast(t_man_emmeans, "pairwise", adjust = "fdr", side = "="))
writeLines(capture.output(pw.t), con="Figures/betadiv_part_turnover_manova_pwc.txt")

# centroids for annotation
nmds_scores_t <- as.data.frame(nmds_scores_t)
nmds_scores_t$LSLU <- LSLU
site.centroid_t <- nmds_scores_t %>%
  group_by(System = LSLU) %>%
  summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2))

# and finally the plot!
ordi_turnover <- ggplot(data = nmds_scores_t,
                           mapping = aes(x = NMDS1,
                                         y = NMDS2)) +
  cleanup +
  scale_color_manual(values = system_col_double) +
  scale_fill_manual(values = alpha(c(system_col_double), (rep(c(0.65, 0.25), each = 4)))) +
  stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse
    geom = "polygon",
    level = 0.75,
    aes(fill = LSLU, color = LSLU)
  ) +
  geom_point(
    size = 1,
    shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan
    stroke = 0.5,
    fill = system_col[LandUse],
    show.legend = 
  ) +
  annotate(
    geom = "text",
    x = c(
      site.centroid_t$NMDS1.center[1] - 0.02, #BF
      site.centroid_t$NMDS1.center[2] - 0.05, #BJ
      site.centroid_t$NMDS1.center[3] - 0.05, #BR
      site.centroid_t$NMDS1.center[4], #BO
      site.centroid_t$NMDS1.center[5] + 0.05, #HF
      site.centroid_t$NMDS1.center[6] + 0.03, #HJ
      site.centroid_t$NMDS1.center[7], #HR
      site.centroid_t$NMDS1.center[8] +0.03 #HO
    ),
    y = c(
      site.centroid_t$NMDS2.center[1] + 0.1, #BF
      site.centroid_t$NMDS2.center[2] - 0.05,  #BJ
      site.centroid_t$NMDS2.center[3], #BR
      site.centroid_t$NMDS2.center[4] , #BO
      site.centroid_t$NMDS2.center[5] , #HF
      site.centroid_t$NMDS2.center[6], #HJ
      site.centroid_t$NMDS2.center[7] , #HR
      site.centroid_t$NMDS2.center[8] - 0.04  #HO
    ),
    label = levels(LSLU),
    fontface = "bold",
    size = 3
  ) +
  # annotate(geom = "text",
  #          x = max(nmds_cent_t$NMDS1) - 0.3,
  #          y = max(nmds_cent_t$NMDS2) + 0.05,
  #          label = paste("Landuse: Wilks =", round(comm_man_t_LSLU$stats["LandUse", "Wilks"], 3),
  #                        "F =", round(comm_man_t_LSLU$stats["LandUse", "approx F"], 3),
  #                        "p < 0.001",
  #                        "\nLandscape: Wilks =", round(comm_man_t_LSLU$stats["LandScape", "Wilks"], 3),
  #                        "F =", round(comm_man_t_LSLU$stats["LandScape", "approx F"], 3),
  #                        "p =", round(comm_man_t_LSLU$stats["LandScape", "Pr(>F)"], 4),
  #                        "\nInteraction: Wilks =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "Wilks"], 3),
  #                        "F =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "approx F"], 3),
#                        "p =", round(comm_man_t_LSLU$stats["LandUse:LandScape", "Pr(>F)"], 4)
#          ),
#          fontface = "bold",
#          size = 1.5,
#          hjust = 0) +
theme(legend.position = "none")

ggsave(ordi_turnover, 
       filename = "Figures/nmds_betapart_turnover_naked.pdf",
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

##### nestedness: just Landuse #####
nmds_scores_n <- cbind(vegan::scores(NMDS_nestedness, display = 'sites'))
comm.lda<-lda(nmds_scores_n, interaction(LandUse, LandScape))

# like adonis, interaction and landscape not significant in manova
comm_man_n <- summary(manova(nmds_scores_n ~ LandUse * LandScape), test = "Wilks") 
comm_man_n2 <- manova(nmds_scores_n ~ LandUse) 
sum_man_n <- summary(comm_man_n2, test = 'Wilks')
writeLines(capture.output(sum_man_n), con="Figures/betadiv_part_nestedness_manova.txt")

# pairwise comparisons
n_man_emmeans <- emmeans(comm_man_n2, ~ LandUse)
pw.n <- test(contrast(n_man_emmeans, "pairwise", adjust = "fdr", side = "="))
writeLines(capture.output(pw.n), con="Figures/betadiv_part_nestedness_manova_pwc.txt")

# centroids for annotation
nmds_scores_n <- as.data.frame(nmds_scores_n)
nmds_scores_n$LandUse <- LandUse
site.centroid_n <- nmds_scores_n %>%
  group_by(System = LandUse) %>%
  summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2))

# and finally the plot!
ordi_nestedness <- ggplot(data = nmds_scores_n,
                          mapping = aes(x = NMDS1,
                                        y = NMDS2)) +
  cleanup +
  scale_color_manual(values = system_col) +
  scale_fill_manual(values = system_col) +
  stat_ellipse( # 95% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse
    data = nmds_scores_n,
    geom = "polygon",
    level = 0.75,
    alpha = 0.75,
    aes(fill = LandUse, color = LandUse)
  ) +
  geom_point(
    data = nmds_scores_n,
    size = 1,
    shape = c(21, 24)[LandScape], # circle Bukit Duabelas, triangle Harapan
    stroke = 0.5,
    fill = system_col[LandUse],
    show.legend = 
  ) +
  # annotate(geom = "text",
  #          x = max(nmds_scores_n$NMDS1),
  #          y = max(nmds_scores_n$NMDS2),
  #          label = paste("Wilks = ", round(comm_man_n$stats["LandUse", "Wilks"], 3),
  #                        "\nF = ", round(comm_man_n$stats["LandUse", "approx F"], 3),
  #                        "\np = <0.001"),
  #          fontface = "bold",
  #          size = 2)+
  theme(legend.position = "none")

ggsave(ordi_nestedness, 
       filename = "Figures/nmds_part_nestedness_naked.pdf", 
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

#### nmds turnover + nestednesss combo plot ####

nmds_tn_plot <- ordi_turnover + ordi_nestedness + 
  plot_annotation(tag_levels = 'A')

ggsave(nmds_tn_plot, 
       filename = "Figures/nmds_tn_combined.pdf", 
       device = "pdf", 
       width = 340, height = 160, units = "mm", dpi = 600)

# overall - turnover - nestedness graphic ---------------------------------
# not in the paper but an overview for ourselves

div_parts <- data.frame(LU = within_o$LU, LS = within_o$ls1, 
                        overall = within_o$value, 
                        turnover = within_t$value,
                        nestedness = within_n$value)

div_parts_m <- melt(div_parts)

parts_plot <- ggplot(data = div_parts_m, aes(x = value)) +
  cleanup +
  scale_color_manual(values = system_col) + 
  facet_grid(variable~.) +
  labs(x = "Beta diversity") +
  theme(legend.position = "none")

parts_LS <- parts_plot +  # Bukit solid Harapan dashed lines
  geom_freqpoly(aes(colour = LU, linetype = LS), binwidth = 0.05)

parts_pooled <- parts_plot + # landscapes pooled
  geom_freqpoly(aes(colour = LU), binwidth = 0.05) 

ggsave(parts_LS, 
       filename = "Figures/parts_LS_Bukit_solid.pdf", 
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

ggsave(parts_pooled, 
       filename = "Figures/parts_LUonly.pdf", 
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

# CCA ----------------------------------------------------------
# requires community matrix and environmental data ("ordination.data" below)

# community matrix is spiders_spp or spiders_fam

#### environmental data ####
ordination.data <- read.csv(file ="Data/EnvironmentalVariables.csv") 
summary(ordination.data)
rownames(ordination.data) <- ordination.data$PlotID

# extract land use systems and landscapes from Plot Names
ordination.data$LU <- factor(substr(ordination.data$PlotID, 2, 2), 
                             levels = c("F", "J", "R", "O"), 
                             labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm"))
ordination.data$LS <- factor(substr(ordination.data$PlotID, 1, 1), 
                             levels = c("B", "H"), 
                             labels = c("Bukit Duabelas", "Harapan"))

ordination.data2 <- ordination.data[,-c(1,9:10)] # data frame without metadata
summary(ordination.data2) 

##### Test for correlations among env variables #####

# data standardised to range 0 ... 1
Environment_comparison2 <-  decostand(ordination.data2, method="range")

# Function to test for (Pearson) correlation of environmental variables.

my_custom_cor <- function(data, mapping, color = I("grey50"), sizeRange = c(1, 5), ...) {
  
  # extract the x and y data from the standardised environmental data
  x <- GGally::eval_data_col(data, mapping$x)
  y <- GGally::eval_data_col(data, mapping$y)
  
  ct <- cor.test(x,y, method = "pearson")
  sig <- symnum(
    ct$p.value, corr = FALSE, na = FALSE,
    cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
    symbols = c("***", "**", "*", ".", " ")
  )
  
  r <- unname(ct$estimate)
  rt <- format(r, digits=2)[1]
  
  # since we can't print it to get the strsize, just use the max size range
  cex <- max(sizeRange)
  
  # helper function to calculate a useable size
  percent_of_range <- function(percent, range) {
    percent * diff(range) + min(range, na.rm = TRUE)
  }
  
  # plot the cor value
  ggally_text(
    label = as.character(rt), 
    mapping = aes(),
    xP = 0.5, yP = 0.5, 
    size = I(percent_of_range(cex * abs(r), sizeRange)), cex=2,
    color = color,
    ...
  ) + 
    # add the sig stars
    geom_text(
      aes_string(
        x = 0.8,
        y = 0.8
      ),
      label = sig, 
      size = I(cex),
      color = color,
      ...
    ) + 
    # remove all the background stuff and wrap it with a dashed line
    theme_classic() + 
    theme(
      panel.background = element_rect(
        color = color, 
        linetype = "longdash"
      ), 
      axis.line = element_blank(), 
      axis.ticks = element_blank(), 
      axis.text.y = element_blank(), 
      axis.text.x = element_blank()
    )
}

my_custom_smooth <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(color = I("blue")) + 
    geom_smooth(method = "lm", color = I("black"), ...)
}

# show correlations between the environmental variables
ggpairs(data = Environment_comparison2,
        upper = list(continuous = wrap(my_custom_cor, sizeRange = c(1,3))), 
        lower = list(continuous = my_custom_smooth), 
        axisLabels = "internal")

# significant correlations between most of the variables. all are <|95|%, but several are >|80|%.

#### msp CCA with all environmental variables ####

# CCA accordning to numerical ecology: NO HELLINGER TRANSFORMATION
# CCAs are more biased by rare species. We did not check that here.

# set up full model with all environmental variables
cca_raw_msp <- cca(spiders_spp ~ ., ordination.data2)
vif.cca(cca_raw_msp)
r2adjust_raw_msp <- RsquareAdj(cca_raw_msp)$adj.r.squared

# set up null model with no environmental variables
cca_0_msp<-cca(spiders_spp ~ 1, data=ordination.data2)

# ordiR2step uses P value and the adjusted R^2 of your raw cca to select which variable to add to the model; ?ordiR2step explains quite well why its better than usual ordistep function	or see: https://www.davidzeleny.net/anadat-r/doku.php/en:forward_sel_examples 
cca_stepforward_msp <- ordiR2step(cca_0_msp, 
                            scope = formula(cca_raw_msp), 
                            R2scope = r2adjust_raw_msp, 
                            direction = "forward", 
                            permutations = 999) 

vif.cca(cca_stepforward_msp)

summary(cca_stepforward_msp)
anova.cca(cca_stepforward_msp, step = 1000, by = "axis")
anova(cca_stepforward_msp, by = "term", permutations = 999)

write.csv(cca_stepforward_msp$CCA$eig, "Figures/cca_msp_eigenvalues.csv")
writeLines(capture.output(anova.cca(cca_stepforward_msp,step=1000,by="axis")),con="Figures/cca_msp_perm.test_axes.txt")
writeLines(capture.output(anova(cca_stepforward_msp, by = "term", permutations = 999)),con="Figures/cca_msp_perm.test_env.variables.txt")

r2_adj_cca_msp<-RsquareAdj(cca_stepforward_msp)$adj.r.squared


#### msp CCA Plot ####
# adjusted % variation explained by each axis
(CCA1.explains.adj_msp<-round(cca_stepforward_msp$CCA$eig["CCA1"]/sum(cca_stepforward_msp$CCA$eig) * RsquareAdj(cca_stepforward_msp)$adj.r.squared,digits=4)*100)

(CCA2.explains.adj_msp<-round(cca_stepforward_msp$CCA$eig["CCA2"]/sum(cca_stepforward_msp$CCA$eig) * RsquareAdj(cca_stepforward_msp)$adj.r.squared,digits=4)*100)

# extract species and site scores (specify vegan to avoid conflict between packages)
species.scores_msp <- data.frame(vegan::scores(cca_stepforward_msp, display = "species", choices = c(1, 2)))
site.scores_msp <- data.frame(vegan::scores(cca_stepforward_msp, display = "sites", choices = c(1, 2)))

site.scores_msp$landuse <- factor(substr(rownames(site.scores_msp), 2, 2), 
                             levels = c("F", "J", "R", "O"))
site.scores_msp$landscape <- factor(substr(rownames(site.scores_msp), 1, 1), 
                         levels = c("B", "H"), 
                         labels = c("Bukit Duabelas", "Harapan"))

location.centroids_msp <- site.scores_msp %>% 
  group_by(site.scores_msp$landuse) %>% 
  dplyr::select(CCA1, CCA2) %>%  
  summarise_all(mean) 

ford_msp <- fortify(cca_stepforward_msp, axes = 1:2)

take <- c("CCA1","CCA2")
arrows_msp <- subset(ford_msp, Score=="biplot")
autoplot(cca_stepforward_msp)

mul_msp <- ggvegan:::arrowMul(arrows_msp[,take], 
                        subset(ford_msp, select = take, Score=="sites"))
arrows_msp[,take] <- arrows_msp[,take] * mul
arrows_msp$Label <- c("Plant \nRichness", "Trees/ha") # nicer names

cca_plot_msp <- ggplot()+
  geom_vline(aes(xintercept = 0), linetype = "dashed") +
  geom_hline(aes(yintercept = 0), linetype = "dashed")  + 
  scale_color_manual(values = system_col) + 
  scale_fill_manual(values=system_col) +
  geom_point(data = species.scores_msp,
             mapping = aes(x = CCA1, y = CCA2),
             shape = '+', # Plot species as + symbol
             colour = "gray25" # Make the symbols gray 
  ) +
  geom_segment(data = arrows_msp,
               mapping = aes(x=0, y=0, xend = CCA1, yend = CCA2),
               arrow = arrow(length = unit(0.01, "npc"))
  ) +
  geom_text(data=arrows_msp,
            mapping=aes(label = Label,
                        x = c(-2.1, - 1.5),
                        y = c(-0.7, 1.7)),
            size = 2)+ 
  coord_fixed() +
  scale_x_continuous(expand = c(.1, .1)) +
  scale_y_continuous(expand = c(.1, .1)) +
  labs(x = paste( "CCA1: Explained variation:", CCA1.explains.adj_msp, "%"),
       y = paste("CCA2: Explained variation:", CCA2.explains.adj_msp, "%")) +
  stat_chull(data = site.scores_msp, mapping = aes(x = CCA1, y = CCA2,
                                               fill = landuse, colour = landuse),
             geom = "polygon", alpha = 0.25, size = 1)  +
  geom_point(data=site.scores_msp,
             mapping=aes(x=CCA1, y=CCA2,
                         color = landuse, shape = landscape), # circle Bukit Duabelas, triangle Harapan
             size = 2
  ) +  
  annotate(geom = "text",  # With names
                x = location.centroids_msp$CCA1, y = location.centroids_msp$CCA2,
                label = location.centroids_msp$`site.scores_msp$landuse`
                ) +
  theme(legend.position = "none",
        axis.text = element_text(size = 8),) +
  cleanup

ggsave(cca_plot_msp, 
       filename = "Figures/cca_msp.pdf", 
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

#### msp CCA without most highly correlated variable ####
# no difference in the results
env_trimmed <- ordination.data2[, - c(2)] # drop relative humidity, which is involved to 2 correlation >|85|%

# standardise the data range 0...1
env_trimmed_stand <- decostand(env_trimmed, method="range")

ggpairs(data = env_trimmed_stand,
        upper = list(continuous = wrap(my_custom_cor, sizeRange = c(1,3))), 
        lower = list(continuous = my_custom_smooth), 
        axisLabels = "internal")

cca_raw2_msp <- cca(spiders_spp ~ ., env_trimmed) # trimmed set of environmental variables
summary(cca_raw2_msp)
anova(cca_raw2_msp,step=1000)
anova(cca_raw2_msp,by="axis",step=1000) 
anova(cca_raw2_msp, by = "term", permutations = 999)
vif.cca(cca_raw2_msp)
r2adjust_raw2_msp <- RsquareAdj(cca_raw2_msp)$adj.r.squared

cca_stepforward2_msp <-ordiR2step(cca_0_msp, 
                              scope=formula(cca_raw2_msp), 
                              R2scope =r2adjust_raw2_msp, 
                              direction="forward", 
                              permutations=999 ) 
vif.cca(cca_stepforward2_msp)
summary(cca_stepforward2_msp)
anova.cca(cca_stepforward2_msp,step=1000,by="axis")
anova(cca_stepforward2_msp, by = "term", permutations = 999)
r2_adj_cca2_msp<-RsquareAdj(cca_stepforward2_msp)$adj.r.squared

#### family-level CCA with all environmental variables ####

# CCA accordning to numerical ecology: NO HELLINGER TRANSFORMATION
# CCAs are more biased by rare species. We did not check that here.

# set up full model with all environmental variables
cca_raw_fam <- cca(spiders_fam ~ ., ordination.data2)
vif.cca(cca_raw_fam)
r2adjust_raw_fam <- RsquareAdj(cca_raw_fam)$adj.r.squared

# set up null model with no environmental variables
cca_0_fam <- cca(spiders_fam ~ 1, data=ordination.data2)

# ordiR2step uses P value and the adjusted R^2 of your raw cca to select which variable to add to the model; ?ordiR2step explains quite well why its better than usual ordistep function	or see: https://www.davidzeleny.net/anadat-r/doku.php/en:forward_sel_examples 
cca_stepforward_fam <- ordiR2step(cca_0_fam, 
                            scope = formula(cca_raw_fam), 
                            R2scope = r2adjust_raw_fam, 
                            direction = "forward", 
                            permutations = 999) 

vif.cca(cca_stepforward_fam)

summary(cca_stepforward_fam)
anova.cca(cca_stepforward_fam, step=1000, by="axis")
anova(cca_stepforward_fam, by = "term", permutations = 999)

write.csv(cca_stepforward_fam$CCA$eig, "Figures/cca_family_eigenvalues.csv")
writeLines(capture.output(anova.cca(cca_stepforward_fam,step=1000,by="axis")),con="Figures/cca_family_perm.test_axes.txt")
writeLines(capture.output(anova(cca_stepforward_fam, by = "term", permutations = 999)),con="Figures/cca_family_perm.test_env.variables.txt")

r2_adj_cca_fam <- RsquareAdj(cca_stepforward_fam)$adj.r.squared


#### family-level CCA Plot ####
# adjusted % variation explained by each axis
(CCA1.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA1"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100)

(CCA2.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA2"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100)

(CCA3.explains.adj_fam<-round(cca_stepforward_fam$CCA$eig["CCA3"]/sum(cca_stepforward_fam$CCA$eig) * RsquareAdj(cca_stepforward_fam)$adj.r.squared,digits=4)*100)

# extract species and site scores (specify vegan to avoid conflict between packages)
species.scores_fam <- data.frame(vegan::scores(cca_stepforward_fam, display = "species", choices = c(1, 2)))
site.scores_fam <- data.frame(vegan::scores(cca_stepforward_fam, display = "sites", choices = c(1, 2)))

# rotate to make symmetrical with msp figure
species.scores_fam$CCA1 <- species.scores_fam$CCA1 * -1
species.scores_fam$CCA2 <- species.scores_fam$CCA2 * -1
site.scores_fam$CCA1 <- site.scores_fam$CCA1 * -1
site.scores_fam$CCA2 <- site.scores_fam$CCA2 * -1

site.scores_fam$landuse <- factor(substr(rownames(site.scores_fam), 2, 2), 
                              levels = c("F", "J", "R", "O"))
site.scores_fam$landscape <- factor(substr(rownames(site.scores_fam), 1, 1), 
                                levels = c("B", "H"), 
                                labels = c("Bukit Duabelas", "Harapan"))

location.centroids_fam <- site.scores_fam %>% 
  group_by(site.scores_fam$landuse) %>% 
  dplyr::select(CCA1, CCA2) %>%  
  summarise_all(mean) 

ford_fam <- fortify(cca_stepforward_fam, axes = 1:2)

take <- c("CCA1","CCA2")
arrows_fam <- subset(ford_fam, Score=="biplot")
# rotate the arrows too
arrows_fam$CCA1 <- arrows_fam$CCA1 * -1
arrows_fam$CCA2 <- arrows_fam$CCA2 * -1

autoplot(cca_stepforward_fam)

mul_fam <- ggvegan:::arrowMul(arrows_fam[,take], 
                          subset(ford_fam, select = take, Score=="sites"))
arrows_fam[,take] <- arrows_fam[,take] * mul_fam
arrows_fam$Label <- c("Canopy \nOpenness","Aboveground \nBiomass", "Trees/ha") # nicer names

cca_plot_fam <- ggplot()+
  geom_vline(aes(xintercept = 0), linetype = "dashed") +
  geom_hline(aes(yintercept = 0), linetype = "dashed")  + 
  scale_color_manual(values = system_col) + 
  scale_fill_manual(values=system_col) +
  geom_point(data = species.scores_fam,
             mapping = aes(x = CCA1, y = CCA2),
             shape = '+', # Plot species as + symbol
             colour = "gray25" # Make the symbols gray 
  ) +
  coord_fixed() +
  scale_x_continuous(expand = c(.1, .1)) +
  scale_y_continuous(expand = c(.1, .1)) +
  labs(x = paste( "CCA1: Explained variation:", CCA1.explains.adj_fam, "%"),
       y = paste("CCA2: Explained variation:", CCA2.explains.adj_fam, "%")) +
  stat_chull(data = site.scores_fam, mapping = aes(x = CCA1, y = CCA2,
                                               fill = landuse, colour = landuse),
             geom = "polygon", alpha = 0.25, size = 1)  +
  geom_point(data=site.scores_fam,
             mapping=aes(x=CCA1, y=CCA2,
                         color = landuse, shape = landscape), # circle Bukit Duabelas, triangle Harapan
             size = 2
  ) +  
  geom_segment(data = arrows_fam,
               mapping = aes(x=0, y=0, xend = CCA1, yend = CCA2),
               arrow = arrow(length = unit(0.01, "npc"))
  ) +
  annotate(geom = "text",  # With names
           x = location.centroids_fam$CCA1 + c(-0.5, 0.1, 0, 0.05), 
           y = location.centroids_fam$CCA2 + c(0.3, -0.1, 0, -0.05),
           label = location.centroids_fam$`site.scores_fam$landuse`
           ) +
  geom_text(data=arrows_fam,
            mapping=aes(label = Label,
                        x = c(2.5, -2.6, -1.6),
                        y = c(-0.7, -0.7, 1.7)),
            size = 2,
            vjust = 'inward', hjust = 'inward') + 
  theme(legend.position = "none",
        axis.text = element_text(size = 8),) +
  cleanup

ggsave(cca_plot_fam, 
       filename = "Figures/cca_fam.pdf", 
       device = "pdf", 
       width = 82, height = 82, units = "mm", dpi = 600)

#### fam CCA without most highly correlated variable ####
# no difference in the results

cca_raw2_fam <- cca(spiders_fam ~ ., env_trimmed) # trimmed set of environmental variables
summary(cca_raw2_fam)
anova(cca_raw2_fam,step=1000)
anova(cca_raw_fam,by="axis",step=1000) 
anova(cca_raw_fam, by = "term", permutations = 999)
vif.cca(cca_raw_fam)
r2adjust_raw2_fam<-RsquareAdj(cca_raw_fam)$adj.r.squared

cca_stepforward2_fam <-ordiR2step(cca_0_fam, 
                              scope=formula(cca_raw2_fam), 
                              R2scope =r2adjust_raw2_fam, 
                              direction="forward", 
                              permutations=999 ) 
vif.cca(cca_stepforward2_fam)
summary(cca_stepforward2_fam)
anova.cca(cca_stepforward2_fam,step=1000,by="axis")
anova(cca_stepforward2_fam, by = "term", permutations = 999)
r2_adj_cca2_fam<-RsquareAdj(cca_stepforward2_fam)$adj.r.squared

#### CCA combo plot figure 7 ####

cca_plot_fig7 <- cca_plot_fam + cca_plot_msp + 
  plot_annotation(tag_levels = 'A')

ggsave(cca_plot_fig7, 
       filename = "Figures/cca_combined.pdf", 
       device = "pdf", 
       width = 340, height = 160, units = "mm", dpi = 600)