library(corrplot)
library(ggboral)
library(R2jags)
library(rjags)
library(mvabund)
library(boral)
library(pscl)
library(devtools)
library(classInt)
library(car)
library(limma)
library(ggpubr)
library(plyr)
library(stringi)
library(dplyr)
library(rgdal)
library(rgeos)
library(maptools)
library(BIEN)
library(ROCR)
library(performance)
library(psych)
library(ggplot2)
library(ggpirate)
library(devtools)
library(colorRamps)
library(bindrcpp)
library(glue)
library(pkgconfig)
library(pkgbuild)
library(tibble)
library(grid)
library(gridExtra)
library(gamlss)
library(vegan)
library(ggplot2)
library(devtools)
library(GGally)
library(fitdistrplus)
library(multcomp)
library(ggpirate)
library(RankAbund)
library(reshape2)
library(ggboral)

###species accumulation curves ----------------------
# using default method "exact"
sac <- specaccum(database2)
plot(sac)
fortify(sac)
# using classical method "random"
sac_ran <- specaccum(database2, method = "random")
plot(sac_ran)


# by LandUse, all on one plot
sac.Forest <- specaccum(database2[LandUse=="Forest",], method = "random",permutations=999)
sac.JR <-     specaccum(database2[LandUse=="Jungle Rubber",], method = "random",permutations=999)
sac.Rubber <- specaccum(database2[LandUse=="Rubber",], method = "random",permutations=999)
sac.OP <-     specaccum(database2[LandUse=="Oil Palm",], method = "random",permutations=999)


sac.Forest <- specaccum(database2[LandUse=="Forest",], method = "rarefaction",permutations=999)
sac.JR <-     specaccum(database2[LandUse=="Jungle Rubber",], method = "rarefaction",permutations=999)
sac.Rubber <- specaccum(database2[LandUse=="Rubber",], method = "rarefaction",permutations=999)
sac.OP <-     specaccum(database2[LandUse=="Oil Palm",], method = "rarefaction",permutations=999)



plot(sac.Forest, col = "#15983DFF",ylab="Species",xlab="Sites")
plot(sac.JR, add = T, col = "#0C5BB0FF")
plot(sac.Rubber, add = T, col = "#FEC10BFF")
plot(sac.OP, add = T, col = "#EE0011FF")
dev.off()


##################Set colours and adjust plot appearance######

system_col = c("#15983DFF", "#0C5BB0FF", "#FEC10BFF", "#EE0011FF") # green, blue, yellow, red, from the pirate plots basel palette
system_col_double <- c(system_col, system_col)
double_col <- rep(system_col, each = 2)
system_names = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")
LU_abb = c("F", "J", "R", "O")



# make a nicer looking ggplot, get rid of background grid, etc.
cleanup <- theme(panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(),
                 panel.background = element_blank(),
                 axis.text = element_text(colour = "black"),
                 #axis.line = element_line(color = "black"),
                 panel.border = element_rect(fill = NA),
                 plot.margin = unit(c(4,4,4,4), "pt")
)
####Rank Abundance curves--------


#####Forest###
Forest<-read.csv2("Database_Salticids_AJ_finished.csv")
Forest<-Forest[,c("Land.use.system","Morpho","Abundance")]#land use level
Forest<-reshape2::dcast(Forest,Land.use.system~Morpho,value.var="Abundance",sum)
Forest<-tibble::column_to_rownames(Forest,"Land.use.system")
Forest<-Forest[1,]#######Select only Forest
Forest_rad<-radfit(Forest)
Forest_rad #Mandelbrot-Model
forest_plot<-radlattice(Forest_rad)
forest_plot<-forest_plot$panel
forest_plot
plot()
####Jungle Rubber####
JungleRubber<-read.csv2("Database_Salticids_AJ_finished.csv")
JungleRubber<-JungleRubber[,c("Land.use.system","Morpho","Abundance")]#land use level
JungleRubber<-reshape2::dcast(JungleRubber,Land.use.system~Morpho,value.var="Abundance",sum)
JungleRubber<-tibble::column_to_rownames(JungleRubber,"Land.use.system")
JungleRubber<-JungleRubber[2,]#######Select only Jungle Rubber
JungleRubber_rad<-radfit(JungleRubber)
JungleRubber_rad # Mandelbrot-Model
jungle_plot<-radlattice(JungleRubber_rad)
jungle_plot


#######Oilpalm
Oilpalm<-read.csv2("Database_Salticids_AJ_finished.csv")
Oilpalm<-Oilpalm[,c("Land.use.system","Morpho","Abundance")]#land use level
Oilpalm<-reshape2::dcast(Oilpalm,Land.use.system~Morpho,value.var="Abundance",sum)
Oilpalm<-tibble::column_to_rownames(Oilpalm,"Land.use.system")
Oilpalm<-Oilpalm[3,]#######Select only Oilpalm
Oilpalm_rad<-radfit(Oilpalm) #Preemption
Oilpalm_rad
oil_plot<-radlattice(Oilpalm_rad)
oil_plot
#####Rubber####
Rubber<-read.csv2("Database_Salticids_AJ_finished.csv")
Rubber<-Rubber[,c("Land.use.system","Morpho","Abundance")]#land use level
Rubber<-reshape2::dcast(Rubber,Land.use.system~Morpho,value.var="Abundance",sum)
Rubber<-tibble::column_to_rownames(Rubber,"Land.use.system")
Rubber<-Rubber[4,]#######Select only Rubber
Rubber_rad<-radfit(Rubber)
Rubber_rad ## Zipf model
rubber_plot<-radlattice(Rubber_rad)
rubber_plot



database<-read.csv2("Database_Salticids_AJ_finished.csv") 

database<-reshape2::dcast(database, Core.Plot~Morpho, value.var = "Abundance",sum)
head(database)




colnames(database)[1] <- "Plot_name"


# extracting land use systems and landscapes from plot names
database$LU <- factor(substr(database$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- database$LU # we also want this as a vector for later

database$LS <- factor(substr(database$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LandScape <- database$LS # we also want this as a vector for later
LSLU <- factor(substr(database$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
rownames(database) <- database$Plot_name
#database<-tibble::column_to_rownames(database,"Plot_name")


database2 <- database[,-c(1,72:73)] # 

ranks_all <- spec.ranks(database2,LandUse)

ra_plot_all <- ra.plot(ranks_all,system_col)

ra_plot_all

ggsave(ra_plot_all, filename = "rank_abund_all.pdf", device = "pdf", width = 100, height = 100, units = "mm", dpi = 600)
# 
ranks_summary <- rbind(cbind(as.data.frame(ranks_all$Forest), system = "Forest"), 
                       cbind(as.data.frame(ranks_all$`Jungle Rubber`), system = "Jungle Rubber"),
                       cbind(as.data.frame(ranks_all$Rubber), system = "Rubber"),
                       cbind(as.data.frame(ranks_all$`Oil Palm`), system = "Oil Palm")
)

write.csv(ranks_summary, "rank_abundance.csv")

# radfit abundance
## radfit fits the data from each plot to each of five different models. The model with the lowest AIC, best explains the data observed in that plot.  
rad <- radfit(database2)
rad
plot(rad)
LSLU

# make a dataframe to compare the best model between land use systems
rad_dev <- sapply(rad, function(x) unlist(lapply(x$models, deviance)))
rad_dev <- t(rad_dev)
rad_dev_long <- melt(rad_dev)
rad_dev_long$LandUse <- factor(substr(rad_dev_long$Var1, 2,2), # extract land use systems from Plot Names
                               levels = c("F", "J", "R", "O"), 
                               labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm"))



# differences in residual deviance between models by land use
rad_aov <- lm(value ~ LandUse*Var2, data=rad_dev_long) # value = residual deviance, Var2 = each of the potential models
summary(rad_aov)
summary(rad_aov)$fstatistic
anova(rad_aov,test="F")

# set up the model with hypothesis for multiple comparisons
rad_model <- glht(model = rad_aov, linfct = mcp(LandUse = "Tukey"))
# pairwise t.test with holm correction
rad_HSD <- summary(rad_model, adjusted(type = c(p.adjust.methods = c("holm"))))
summary(rad_HSD)
### RESULT: 
# Significant differences in Rubber-Forest, Rubber-Jungle Rubber, Oil palm-jungle rubber 
write.csv(cbind(t_value = rad_HSD$test$tstat, p_value = rad_HSD$test$pvalues), "rad_HSD.csv") # save test values for manuscript
racurve(database2,main="Rank Abundance Curves",nlab=0,ylog=FALSE,frequency=FALSE)





##################Pearson Correlations of environmental variables----------
library("ggboral")
library("boral")
library("rjags")
library("boral")
library("corpcor")
library("mctest")
library("olsrr")
library("ppcor")

Environment_comparison<- read.csv2(file ="20190829_Environmental Variables_AJ.csv")
Environment_comparison$LU <- factor(substr(Environment_comparison$Core.Plot, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
Environment_comparison$LS <- factor(substr(Environment_comparison$Core.Plot, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
rownames(Environment_comparison)<-Environment_comparison$Core.Plot
Environment_comparison2<-Environment_comparison[,-c(1:2,9:10)] #delete metadata

Environment_comparison2<-decostand(Environment_comparison2,method="range") #Transformation to compare variables
my_custom_cor <- function(data, mapping, color = I("grey50"), sizeRange = c(1, 5), ...) {
  
  # get the x and y data to use the other code
  x <- GGally::eval_data_col(data, mapping$x)
  y <- GGally::eval_data_col(data, mapping$y)
  
  ct <- cor.test(x,y, method = "pearson")
  sig <- symnum(
    ct$p.value, corr = FALSE, na = FALSE,
    cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
    symbols = c("***", "**", "*", ".", " ")
  )
  
  r <- unname(ct$estimate)
  rt <- format(r, digits=2)[1]
  
  # since we can't print it to get the strsize, just use the max size range
  cex <- max(sizeRange)
  
  # helper function to calculate a useable size
  percent_of_range <- function(percent, range) {
    percent * diff(range) + min(range, na.rm = TRUE)
  }
  
  # plot the cor value
  ggally_text(
    label = as.character(rt), 
    mapping = aes(),
    xP = 0.5, yP = 0.5, 
    size =6,
    color = color,
    ...
  ) + 
    # add the sig stars
    geom_text(
      aes_string(
        x = 0.8,
        y = 0.8
      ),
      label = sig, 
      size = I(cex),
      color = color,
      ...
    ) + 
    # remove all the background stuff and wrap it with a dashed line
    theme_classic() + 
    theme(
      panel.background = element_rect(
        color = color, 
        linetype = "longdash"
      ), 
      axis.line = element_blank(), 
      axis.ticks = element_blank(), 
      axis.text.y = element_blank(), 
      axis.text.x = element_blank()
    )
}


my_custom_smooth <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(color = I("blue")) + 
    geom_smooth(method = "lm", color = I("black"), ...)
}

ggpairs(data = Environment_comparison2,
        upper = list(continuous = wrap(my_custom_cor, sizeRange = c(1,3))), 
        lower = list(continuous = my_custom_smooth), 
        axisLabels = "external")



Environment_comparison2<-Environment_comparison2[,-2] #Remove Humidity due to its correlation to temperature 
ordination.data2<-Environment_comparison2
###BUILD CCA####

database<-read.csv2("Database_Salticids_AJ_finished.csv") #reading in species data
database<-reshape2::dcast(database, Core.Plot~Morpho, value.var = "Abundance",sum)
colnames(database)[1] <- "Plot_name"
database$LU <- factor(substr(database$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- database$LU # we also want this as a vector for later

database$LS <- factor(substr(database$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LandScape <- database$LS # we also want this as a vector for later
LSLU <- factor(substr(database$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
rownames(database) <- database$Plot_name # change the row names to be the names of the plots

database<-database[,-c(72:73)]
database2 <- database[,-c(1,72:73)] #remove metadata




ordination.data <- read.csv2(file ="20190829_Environmental Variables_AJ.csv")
summary(ordination.data)
ordination.data$System <- factor(ordination.data$System, levels = c("F", "J", "R", "O"))

ordination.data$LU <- factor(substr(ordination.data$Core.Plot, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
ordination.data$LS <- factor(substr(ordination.data$Core.Plot, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
rownames(ordination.data)<-ordination.data$Core.Plot
ordination.data2<-ordination.data[,-c(1:2,9,10)] #remove Metadata

ordination.data2<-ordination.data2[,-2] #remove humidity

ordination.data2<-decostand(ordination.data2,method="range")


set.seed(15)

cca_raw<-cca(database2~.,ordination.data2) 

anova(cca_raw)

vif.cca(cca_raw)
r2adjust_raw<-RsquareAdj(cca_raw)$adj.r.squared

anova(cca_raw,step=1000)
anova(cca_raw,by="axis",step=1000) #CCA1:"***", CCA2:"."







cca_0<-cca(database2~1,data=ordination.data2) # just species data

#ordiR2step to build cca which uses the double criterion: p-value and the adjusted R²
cca_stepforward<-ordiR2step(cca_0,scope=formula(cca_raw),R2scope =r2adjust_raw,direction="forward",permutations=999 ) 

vif.cca(cca_stepforward)

anova_CCA<-cca_stepforward$anova
anova_CCA #See results
cat("anova_CCA\n", file = "anova_CCA", append = TRUE)
capture.output(anova_CCA, file = "anova_CCA", append = TRUE)
summary(cca_stepforward)
cca_stepforward
sink("cca.txt")
print(summary(cca_stepforward))



library(tidyverse)

###Plotting CCA results#####
species.scores <- data.frame(scores(cca_stepforward, display = "species", choices = c(1, 2)))
site.scores <- data.frame(scores(cca_stepforward, display = "sites", choices = c(1, 2)))
site.scores$system<-System
location.centroids <- site.scores %>% group_by(ordination.data$System) %>% dplyr::select(CCA1, CCA2) %>%  summarise_all(mean) 

ford<-fortify(cca_stepforward,axes=1:2)

take<-c("CCA1","CCA2")
arrows<-subset(ford,Score=="biplot")
arrows<-fortify(arrows)
autoplot(cca_stepforward)

mycols<-c("green","blue","yellow","red")

mul<-ggvegan:::arrowMul(arrows[,take],subset(ford,select=take,Score=="sites"))
arrows[,take]<-arrows[,take]*mul
arrows$Label <- c("LUI","AGB", "Canopy Openness") # nicer names
arrow_Canopy<-arrows[-c(1:2),]

arrow_AGB<-arrows[-c(1,3),]

arrow_LUI<-arrows[-c(2:3),]
System<-ordination.data$System
library(ggpubr)



cca_plot<-ggplot()+
  scale_color_manual(values = system_col,aesthetics= c("colour","fill"))+
  geom_vline(aes(xintercept = 0), linetype = "dashed") +
  geom_hline(aes(yintercept = 0), linetype = "dashed")+
  geom_segment(data=arrow_Canopy,
               mapping=aes(x=0,y=0,xend=CCA1*0.6,yend=CCA2*0.8),
               arrow=arrow(length=unit(0.01,"npc"))
               
  )+
  geom_segment(data=arrow_AGB,
               mapping=aes(x=0,y=0,xend=CCA1*0.5,yend=CCA2*0.6),
               arrow=arrow(length=unit(0.01,"npc")))+
  
  geom_segment(data=arrow_LUI,
               mapping=aes(x=0,y=0,xend=CCA1*0.5,yend=CCA2*0.8),
               arrow=arrow(length=unit(0.01,"npc")))+
  geom_text(data=arrow_Canopy,
            mapping=aes(label=Label,fontface="bold",x=CCA1*0.80,y=CCA2*1.1),size=3)+ 
  geom_text(data=arrow_AGB,
            mapping=aes(label=Label,fontface="bold",x=CCA1+0.6,y=CCA2+0.6),size=3)+ 
  geom_text(data=arrow_LUI,
            mapping=aes(label=Label,fontface="bold",x=CCA1/1.8,y=CCA2*0.9),size=3)+
  coord_fixed() +
  scale_x_continuous(expand = c(.1, .1)) +
  scale_y_continuous(expand = c(.1, .1)) +
  annotate(geom = "text",  # With names
           x = location.centroids$CCA1, y = location.centroids$CCA2,
           label = location.centroids$`ordination.data$System`,
           fontface = "bold")+
  labs(x = paste( "CCA1: Explained variation: 7,5 %"),
       y = paste("CCA2: Explained variation: 4,6 %")) +
  stat_chull(data = site.scores, mapping = aes(x = CCA1, y = CCA2,
                                               fill = System, colour = System),
             geom = "polygon", alpha = 0.25, size = 1)  +
  geom_point(data=site.scores,
             mapping=aes(x=CCA1, y=CCA2,
                         colour = System, shape = LandScape), # circle Bukit Duabelas, triangle Harapan
             size=3,
  ) +
  
  theme(legend.position = "none",
        text = element_text(size = 15),
        axis.text = element_text(size = 15, face = "bold"),
        panel.border = element_rect(colour = "black", fill = NA),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(color = "black"))





cca_plot
ggsave(cca_plot, filename = "cca_figure.pdf", device = "pdf", width = 150, height = 150, units = "mm", dpi = 600)



#######Analysis mvabund########------------


database_mv<-read.csv2("Database_Salticids_AJ_finished.csv") #reading in species data
database_mv<-reshape2::dcast(database_mv, Core.Plot~Morpho, value.var = "Abundance",sum)


colnames(database_mv)[1] <- "Plot_name"

database_mv$LU <- factor(substr(database_mv$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- database_mv$LU # we also want this as a vector for later

database_mv$LS <- factor(substr(database_mv$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LandScape <- database_mv$LS # we also want this as a vector for later
LSLU <- factor(substr(database_mv$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
rownames(database_mv) <- database_mv$Plot_name # change the row names to be the names of the plots




database2_mv <- database_mv[,-c(1,72:73)] #remove metadata

Spiders_mv<-mvabund(database2_mv) 
par(mar=c(2,10,2,2)) # adjusts the margins
boxplot(Spiders_mv,horizontal = TRUE,las=2, main="Abundance")

meanvar.plot(Spiders_mv)

plot(Spiders_mv~database_mv$LU,cex.axis=0.8,cex=0.8)

model_poisson <- manyglm(Spiders_mv ~ LandUse, family="poisson")
plot(model_poisson)
#anova_model_pois<-anova(model_poisson) 
anova_model_pois

model_poisson$AICsum 

summary.manyglm(model_poisson)


model_interact<-manyglm(Spiders_mv~LandUse+LandUse*LandScape,data=database_mv,family="negative_binomial")
plot.manyglm(model_interact)
summary_interact<-summary.manyglm(model_interact) #interaction is not significant, LS is not significant, 
summary_interact

anova_interact<-anova.manyglm(model_interact,cor.type="shrink",show.time="all") # Landuse significant, no significant interaction  *cor.type = "shrink"* # estimates correlations between species, but in an efficient way, which is necessary when there are many more species than  samples

model_interact$AICsum# 2865

model_LSLU<-manyglm(Spiders_mv~LandUse+LandScape, data=database_mv,family="negative_binomial")
plot.manyglm(model_LSLU)
summary_LSLU<-summary.manyglm(model_LSLU)
summary_LSLU 
model_LSLU$AICsum # 2538

anova_LSLU<-anova.manyglm(model_LSLU,cor.type="shrink",show.time="all",)
anova_LSLU


model_nbinom<- manyglm(Spiders_mv ~ LandUse, family="negative_binomial") # "composition FALSE (default) will model abundance, TRUE will model relative abundance, by adding a row effect to the model, and partition effects of environmental variables into main effects (alpha, total abundance/richness) and interactions with response (beta, relative abundance/turnover). See details.
plot.manyglm(model_nbinom) ## Looks better than poisson
model_nbinom$AICsum

anova_LU<-anova.manyglm(model_nbinom,cor.type="shrink",show.time="all")
anova_LU
model_nbinom$AICsum #2501 , best model
summary_LU<-summary.manyglm(model_nbinom)
summary_LU  


model_binom_adjusted<-anova(model_nbinom,p.uni="adjusted") 
model_binom_adjusted#in which species are the effects 

#Pairwise comparisons land-use systems          
#manyglm(Spiders_mv ~ LandUse,family="negative_binomial") -> msolglm
#anova(msolglm, pairwise.comp = LandUse, nBoot = 999,test="wald")


# presabs<-Spiders_mv
# presabs[presabs>0] =1
# presabs

#model_binomial<-manyglm(presabs~LU,data=database_mv,family="binomial")
#plot(model_binomial) #Fit is worse than for negative_binomial

#anova_model_binomial<-anova(model_binomial,cor.type="shrink",show.time="all")



################Boral: Model II Purely latent but with site effect------------------------



fit.lvmp_random<-boral(y=database2,family="negative.binomial",lv.control=list(num.lv=2),row.eff="random",save.model=TRUE)


lvsplot(fit.lvmp,ind.spp=25,est="mean")
lvsplot(fit.lvmp,ind.spp=25,est="median")
#Plotting with ggplot-----

dat.gg_ggboral_random<-gg_lvsplot_data(fit.lvmp_random)
dat.gg.nmds_random<-fortify(dat.gg_ggboral_random)
dat.ggn_site_random <-cbind(dat.gg_ggboral_random[(dat.gg_ggboral_random$var=="lv"),], LandUse) # divide the data frame into sites and species
dat.ggn_region_random <- cbind(dat.gg_ggboral_random[(dat.gg_ggboral_random$var=="lv"),], LandScape)
dat.ggn_species_random <- dat.gg_ggboral_random[(dat.gg_ggboral_random$var=="lvcoef"), ]


dat.ordi_salts_random <- ggplot(data = dat.gg_ggboral_random,
                                mapping = aes(x = lv1,                                    y = lv2))+
  labs(x="Latent variable 1",y ="Latent variable 2") +
  cleanup +
  scale_color_manual(values = system_col,aesthetics = c("colour", "fill"))+
  geom_point(
    data = dat.ggn_species_random,
    size = 2.0,
    shape = 3,
    col = "black"
  ) +
  geom_point(data = dat.ggn_site_random,
             size = 1.5,
             shape = c(15, 16, 17, 19)[LandUse],
             aes(color = LandUse)
  ) +
  stat_ellipse( # 75% confidence interval
    data = dat.ggn_site_random,
    geom = "polygon",
    level = 0.75,
    alpha = 0.6,
    aes(fill = LandUse, color = LandUse)
  ) +
  geom_point(
    data = dat.ggn_site_random,
    size = 2.0,
    colour="black",
    shape = c(21,24)[LandScape], # circle Bukit Duabelas, triangle Harapan
    stroke = 0.5,
    fill = system_col[LandUse],
    show.legend =
  ) +
  
  theme(legend.position = "none")+
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))


dat.ordi_salts_random



ggsave(dat.ordi_salts_random, 
       filename = "boral.pdf", 
       device = "pdf", 
       width = 200, height = 150, units = "mm",dpi=600)


database_abundance<-read.csv2("EFForTS.2017samples.OrderLevelAbundances_20190424_AJ.csv")
database_abundance<-reshape2::dcast(database_abundance,Core_Plot~., value.var = "Salticidae",sum)

head(database_abundance)

colnames(database_abundance)[1]<-"Plot_name"
colnames(database_abundance)[2]<-"Salticidae"




# extracting land use systems and landscapes from plot names
database_abundance$LU <- factor(substr(database_abundance$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- database_abundance$LU # we also want this as a vector for later

database_abundance$LS <- factor(substr(database_abundance$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LandScape <- database_abundance$LS # we also want this as a vector for later
LSLU <- factor(substr(database_abundance$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
#Analaysis abundance(inividuals per m²) including juveniles---------------------
#database<-tibble::column_to_rownames(database,"Plot_name")
rownames(database_abundance) <- database_abundance$Plot_name # change the row names to be the names of the plots

rownames(database_abundance) <- database_abundance$Plot_name

database_abundance2 <- database_abundance[,-c(1,3:4), FALSE] 




abundance_m2<-abundance*2
abundance_m2<-abundance_m2/24 # = Abundance per m² (Divided by 24 1m x 1m traps)

mean_abund <- tapply(abundance_m2, LandUse, mean)

med_abund<-tapply(abundance_m2, LandUse, median)
sd_abund<-tapply(abundance_m2,LandUse,sd)


descdist(abundance_m2, discrete = F) 

fit.norm_m2 <-  fitdist(abundance_m2, "norm")
fit.lnorm_m2 <- fitdist(abundance_m2, "lnorm")
fit.gamma_m2 <- fitdist(abundance_m2, "gamma")


plot(fit.norm_m2)
plot(fit.lnorm_m2)
plot(fit.gamma_m2)


fit.norm <-  fitdist(abundance_m2, "norm")
fit.lnorm <- fitdist(abundance_m2, "lnorm")
fit.pois <- fitdist(abundance, "pois")
fit.gamma<-fitdist(abundance,"gamma")
fit.nbin<-fitdist(abundance,"nbinom")


fit.norm$aic
fit.lnorm$aic #lowest aic
fit.gamma$aic  
fit.pois$aic
fit.nbin$aic #Poisson and Nbin do not fit Abundance
shapiro.test(log(abundance_m2))

bartlett.test(log(abundance_m2) ~ LU_out)
bartlett.test(log(abundance_m2) ~ LS_out)



abund_lm_landuse<-glm(abundance_m2~LandUse,family=gaussian(link=log))

abund_lm2 <- glm(abundance_m2~LandUse + LandScape, family = gaussian(link = log))

anova(abund_lm2, test = "F") #Not signinifkant
anova(abund_lm_landuse, abund_lm2, test = "F") #This model is better

compare_performance(abund_lm_landuse,abund_lm2)

cook_abund_out <- cooks.distance(abund_lm2)
plot(cook_abund_out, pch="*", cex=2, main="Influential Obs by Cooks distance")  # plot cook's distance
abline(h = 4*mean(cook_abund_out, na.rm=T), col="red")  # add cutoff line
text(x=1:length(cook_abund_out)+1, y=cook_abund_out, labels=ifelse(cook_abund_out>4*mean(cook_abund_out, na.rm=T),names(cook_abund_out),""), col="red")  # add labels

scores(abu_out, type="chisq", prob = 0.90)
scores(abu_out, type="chisq", prob = 0.95)


abund_model <- glht(model = abund_lm2, linfct = mcp(LandUse = "Tukey"))
# pairwise t.test with holm correction
summary(abund_model)
abund_HSD <- summary(abund_model, adjusted(type = c(p.adjust.methods = c("holm"))))
summary(abund_HSD)
write.csv(cbind(z_value = abund_HSD$test$tstat, p_value = abund_HSD$test$pvalues), "abund_out_HSD.csv") # save test values for manuscript

abund_HSD$test$tstat
# get the letters
abund_lu_cld <- cld(abund_HSD, decreasing = T) 
abu_cld <- abund_lu_cld$mcletters$Letters

abu.df <- as.data.frame(abundance_m2)
abu.df$LandUse <- LandUse
abu.df$LandScape <- LandScape
abu_out.df <- as.data.frame(abu_out) 

# pirate plot dissected: horizontal line = means, boxes = 95% confidence intervals, violins = density

# landscapes pooled
abu_pirate_pooled <- 
  ggplot(abu.df, aes(x = LandUse, y = abundance_m2)) +
  cleanup+
  scale_color_manual(values = system_col, aesthetics = c("colour", "fill")) +
  geom_pirate(aes(colour = LandUse, fill = LandUse),
              bars = F, 
              points_params = list(shape = 21, size = 1.5, colour = "black"),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("Abundance, per "~m^2), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))+
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(abu.df$abund)*1.02,
    label = abu_cld, size=5,vjust=0)
plot(abu_pirate_pooled)

ggsave(abu_pirate_pooled, filename = "abundance_pooled.pdf", device = "pdf", width = 100, height = 100, units = "mm", dpi = 600)

# # use this block and leave out previous annotate() to plot with y-axis on log scale
# abu_pirate<-scale_y_log10(abu.df, aes(x = LandUse, y = abund)) +
#   labs(y = expression("log abundance, per "~m^2)) +
#   annotate(
#     geom = "text",
#     x = c(1:4),
#     y = max(abu.df$abund)*1.1,
#     label = abu_cld)
ggsave(abu_pirate_pooled, filename = "abundance_pooled.pdf", device = "pdf", width = 82, height = 82, units = "mm", dpi = 600)

# # use this block and leave out previous annotate() to plot with y-axis on log scale
# abu_pirate<-scale_y_log10(abu.df, aes(x = LandUse, y = abund)) +
#   labs(y = expression("log abundance, per "~m^2)) +
#   annotate(
#     geom = "text",
#     x = c(1:4),
#     y = max(abu.df$abund)*1.1,
#     label = abu_cld)




# Richness ----------------------------------------------------------------

richness <- specnumber(database2)
med_rich <- tapply(richness, LandUse, median)
mean_rich <- tapply(richness, LandUse, mean)
sd_rich <- tapply(richness, LandUse, sd)

# data frame for plotting
rich.df <- as.data.frame(richness)
colnames(rich.df) <- c("rich")
rich.df$LandUse <- LandUse
rich.df$LandScape <- LandScape
rich.df$LSLU <- LSLU

## check distribution of data: how does the shape of the data space compare to shape of theoretical probability distributions we might want to use?
# kurtosis =  = "tailedness"; does the data have long or short tails?
# skewness = shift or assymetry of the data relative to the mean; is mode >, <, or = to mean?
descdist(richness, discrete = T) # discrete = T means data is countable values, i.e. non-negative integers like we have (default is F!). This doesn't mean that a continuous distribution might not fit better, we just need to be aware of possible limitations.
# negative binomial: count data, ideally with "large" sample sizes, often you know both how often something happened and did not happen. generally good for overdispersed count data. Poisson: "rare" events, you know how often they occured but it is not meaningful to ask how often did not occur
#descdist(richness, discrete = F) # beta dist: only for proportion data from 0 to 1. gamma dist: data is positive and skewed

fit.nbin <- fitdist(richness, "nbinom") 
fit.pois <- fitdist(richness, "pois")
fit.norm <- fitdist(richness, "norm")
fit.lnorm <- fitdist(richness, "lnorm")
fit.gamma <- fitdist(richness, "gamma")
# 
plot(fit.nbin)
qqp(richness, "nbinom", size = fit.nbin$estimate[[1]], mu = fit.nbin$estimate[[2]])
plot(fit.pois)
qqp(richness, "pois", lambda=fit.pois$estimate)
plot(fit.norm)
plot(fit.lnorm)
plot(fit.gamma)

#lowest aic indicates best fit
fit.nbin$aic
fit.pois$aic   # lowest fit 
fit.norm$aic
fit.lnorm$aic 
fit.gamma$aic

shapiro.test(richness) # p = 0,2372, normally distributed

## equality of variance/homoscedasticity
bartlett.test(richness ~ interaction(LandUse, LandScape)) 
bartlett.test(richness ~ LandScape)  




interaction.plot(LandScape, LandUse, (richness))
boxplot(log(richness)~LandUse*LandScape)
boxplot(richness~LandUse*LandScape)

mean_alpha <- tapply(richness, LandUse, mean)
sd_alpha <- tapply(richness, LandUse, sd)

rich_glm2<-glm(richness~LandUse*LandScape,family=poisson) 
summary(rich_glm2)
rich_glm3<-glm(richness~LandUse+LandScape)  #Final Model
summary(rich_glm3) 
anova(rich_glm3, test = "F") 
compare_performance(rich_glm2, rich_glm3) #glm3 is better

write.csv(anova(rich_glm3, test = "F"), "richness_glm.csv") # save for manuscript


#richness plot#
rich_HSD<-glht(model=rich_glm3,linfct=mcp(LandUse="Tukey"))

rich_HSD_LU<-summary(rich_HSD, adjusted(type=c(p.adjust.methods=c("holm"))))
summary(rich_HSD_LU)

rich_HSD_LU$test$tstat
# get the letters
rich_lu_cld <- cld(rich_HSD_LU, decreasing=TRUE) 
rich_cld <- rich_lu_cld$mcletters$Letters
rich_cld

rich_pirate_pooled <- ggplot(rich.df, aes(x = LandUse, y = rich)) +
  cleanup +
  scale_color_manual(values = system_col,aesthetics = c("colour", "fill")) +
  geom_pirate(aes(colour = LandUse, fill = LandUse),
              bars = F, points_params = list(shape = 21, size = 1.5,colour="black"),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = "Species richness", x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y =  max(rich.df$rich) * 1.03,
    label = rich_cld, size=5)+
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))


rich_pirate_pooled

ggsave(rich_pirate_pooled, 
       filename = "rich.pdf", 
       device = "pdf", 
       width = 100, height = 100, units = "mm", dpi = 600)
# Community composition: prep for nmds ------------------------------------------

# Important !!! Is the species response linear or unimodal?
# Rule of thumb (Leps & Smilauer 2003)
#   SD < 3 -> linear -> PCA
#   or > 4 -> unimodal -> NMDS
#   inbetween -> you can choose
# You check this with a DCA and take a look at the length of the first axis

# DCA (detrended correspondence analysis) ####
# Is the data unimodal? run a DCA with our species
DCA <- decorana(database2, # our community data in vegan-friendly format 
                iweigh = 0)           # iweight = downweighting rare species (0 = no; 1 = yes), 

#DCA <- rda(database2)


DCA # length of first axis = 4.49 -> unimodal response -> nmds
# use summary(DCA) if you would like to see species scores #Axis Length 


scree_values <- function(x,max, dist_meas,binary=F) 
{	
  xx <- as.matrix(database2)
  scree.points = NULL
  scree.temp = 1
  for(i in 1:max)
  {
    sol.mds = NULL
    sol.mds  <- replicate(10, metaMDS(xx, k = i, trymax = 30, distance = dist_meas, binary)$stress, 
                          simplify = TRUE)
    scree.points <- append(scree.points, sol.mds)
  }
  return(scree.points)
}

#NMDS -------------------------
dimcheckMDS(database2, distance = "bray", k = 5, trymax = 20,
            autotransform = TRUE)
# IMPORTANT! We need to know how many dimensions to calculate. Stress indicates when the model is sufficient to explain the data
#database_presabs<-ifelse(database2>0,1,0)
# Calculate relationship between dimensions and stress
scree.res <- scree_values(database2, max = 10, dist_meas = "bray",binary=FALSE) 
# Prerequisite for plotting replicated runs of NMDS
plot_vec <- as.vector(sapply(1:(length(scree.res)/10), function(x) rep(x,10), simplify = "vector"))


# Plot dimensions vs. stress
plot(plot_vec, scree.res, ylab = "Stress", xlab = "Dimensions", main = "Stress vs. Dimensions")
lines(1:(length(scree.res)/10), as.vector((by(scree.res, plot_vec, mean))))
abline(h = 0.1, col = "red", lty = "dashed")
# Red line indicates threshold for "good" Stress value


NMDS <- metaMDS(database2,distance="bray", k = 5) # Perform the actual NMDS
stressplot(NMDS, main = paste("k = ", NMDS$ndim, ", NMDS/Bray-Curtis - Stress =", round(NMDS$stress, 3))) # take a look at the stressplot -- points should cluster around the NMDSline, stress should be less than 0.1 and approaching 0.05 is even better   

#plot(NMDS,type="t",main=paste("NMDS/HORN=",round(NMDS$stress,3))) # A quick glance
#Check assumption of homogeneity of multivariate dispersion 
database2$Landuse<-LandUse
Distances_data<-vegdist(database2)
anova(betadisper(Distances_data, database2$Landuse))  #its fine
database2<-database2[,-71]

# NMDS plot -----------------------------------------------
ggnmds <- fortify(NMDS) # extract the scores
ggn_site <- cbind(ggnmds[(ggnmds$Score=="sites"),], LandUse) # divide the data frame into sites and species
ggn_region <- cbind(ggnmds[(ggnmds$Score=="sites"),], LandScape)
ggn_species <- ggnmds[(ggnmds$Score=="species"), ]

# for labeling
r2 <-  round(beta_ado$R2[1], 3)
f_value <- round(beta_ado$`F`[1], 3)
probF <- round(beta_ado$`Pr(>F)`[1], 3)
write.table(cbind(r2, f_value, probF), "adonis_beta_LSLU.csv") # save test values for manuscript

# centroids for annotation
site.centroid <- as.data.frame(ggn_site) %>% 
  group_by(System = LandUse) %>%
  summarise(NMDS1.center = mean(NMDS1), NMDS2.center = mean(NMDS2))

# extract scores from nmds
nmds_scores <- cbind(vegan::scores(NMDS))
nmds_spec <- vegan::scores(NMDS$species)
comm.lda<-lda(nmds_scores, LandUse)
comm.lda
comm_man <- summary(manova(nmds_scores ~ LandUse+ LandScape), test = "Wilks") # multivariate anova; does the variable significantly predict variability in 
comm_man #Final Model ! 



# comm_man_lslu <- summary(manova(nmds_scores ~ LandUse * LandScape), test = "Wilks") # multivariate anova; does the variable significantly predict variability in nmds scores?
# comm_man_lslu #No significant interaction 

#Plotting

ordi_salts <- ggplot(data = ggnmds,
                     mapping = aes(x = NMDS1,
                                   y = NMDS2)) +
  cleanup +
  scale_color_manual(values = system_col,aesthetics = c("colour", "fill"))+
  
  geom_point(
    data = ggn_species,
    size = 2.0,
    shape = 3,
    col = "black"
  ) +
  # geom_point(
  #   data = ggn_site,
  #   size = 1.5,
  #   shape = c(15, 16, 17, 19)[LandUse],
  #   aes(color = LandUse)
  # ) +
  stat_ellipse( # 75% confidence interval, centroids are means of NMDS1 and NMDS2 for each LandUse
    data = ggn_site,
    geom = "polygon",
    level = 0.75,
    alpha = 0.6,
    aes(fill = LandUse, color = LandUse)
  ) +
  geom_point(
    data = ggn_region,
    size = 2.0,
    colour="black",
    shape = c(21,24)[LandScape], # circle Bukit Duabelas, triangle Harapan
    stroke = 0.5,
    fill = system_col[LandUse],
    show.legend = 
  ) +
  annotate(geom = "text",
           x = -0.7,
           y = 0.6,
           label = paste("Wilks Lambda = ", round(comm_man$stats["LandUse", "Wilks"], 3),
                         "\nF = ", round(comm_man$stats["LandUse", "approx F"], 3),
                         "\np = <0.001"),
           fontface = "bold",
           size = 4)+
  theme(legend.position = "none")+
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))

ordi_salts

ggsave(ordi_salts, 
       filename = "nmds.pdf",device = "pdf",width =200, height = 150, units = "mm", dpi = 600)




######Venn Diagrams--------
source("custom_venn.R")
salt_sum<-read.csv2("Database_Salticids_AJ_finished.csv") 
salt_sum<-reshape2::dcast(salt_sum, Core.Plot~Morpho, value.var = "Abundance",sum)
colnames(salt_sum)[1] <- "Plot_name"
salt_sum$LU <- factor(substr(salt_sum$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- salt_sum$LU # we also want this as a vector for later

salt_sum$LS <- factor(substr(salt_sum$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LandScape <- salt_sum$LS # we also want this as a vector for later
LSLU <- factor(substr(salt_sum$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
salt_sum <- recast(salt_sum, LU~variable, sum)
rownames(salt_sum) <- salt_sum$LU # change the row names to be the names of the Land Use systems
salt_sum <- salt_sum[,-1] # drop the Group column

salt_sum2

venn_counts <- vennCounts(salt_sum2)

venn_diagram<-custom_venn(venn_counts, names = system_names, circle.col = system_col, cex=c(1.5, 1.5, 1.5), mar=rep(0,4))

venn_diagram+title(main="b")




ggsave(venn_diagram, 
       filename = "venns_diagram.pdf", 
       device = "pdf", 
       width = 250, height = 150, units = "mm", dpi = 1000)

#Over Landscapes 
salt_sum<-read.csv2("Database_Salticids_AJ_finished.csv") 
salt_sum<-reshape2::dcast(salt_sum, Core.Plot~Morpho, value.var = "Abundance",sum)
colnames(salt_sum)[1] <- "Plot_name"
salt_sum$LU <- factor(substr(salt_sum$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
LandUse <- salt_sum$LU # we also want this as a vector for later

salt_sum$LS <- factor(substr(salt_sum$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
salt_sum_ls<- recast(salt_sum, LS~variable, sum)
str(salt_sum_ls)
rownames(salt_sum_ls) <- salt_sum_ls$LS # change the row names to be the names of the LandScapes
salt_sum_ls <- salt_sum_ls[,-1] #drop the Region column

venn_counts_ls <- vennCounts(salt_sum_ls2)


venn_ls<-custom_venn(venn_counts_ls, names = levels(LandScape), circle.col = c("orange", "purple"), cex=c(1.5, 1.5, 1.5), mar=rep(0,4))

+title(main="a")







#####Analyzing Molecular Data------
#Please restart R to unload all previously used packages otherwise the code might not run smoothly 
library(picante)
library(ape)
library(dplyr)
library(pegas)
library(phangorn)
library(stringi)
library(multcomp)
library(ggplot2)
library(ggpirate)
set.seed(10)
community<-read.csv2("Database_Salticids_AJ_finished_NRI.csv")



community<-reshape2::dcast(community, Core.Plot~Morpho, value.var = "Abundance",sum)
colnames(community)[1] <- "Plot_name"
community$LU <- factor(substr(community$Plot_name, 2, 2), levels = c("F", "J", "R", "O"), labels = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")) # extract land use systems from Plot Names
community$LS <- factor(substr(community$Plot_name, 1, 1), levels = c("B", "H"), labels = c("Bukit Duabelas", "Harapan")) # extract landscape names from Plot Names
LSLU <- factor(substr(community$Plot_name, 1, 2), levels = c("BF", "BJ", "BR", "BO", "HF", "HJ", "HR", "HO")) # factor combining land use and landscape
LULS <- factor(stri_reverse(LSLU))
LandScape <- community$LS # we also want this as a vector for later
Landuse <- community$LU


rownames(community) <- community$Plot_name
community<-community[,-c(1,68:69)]
community<-select(community,-c(AraSalt066,AraSalt084,AraSalt095,AraSalt073,AraSalt098,AraSalt099,AraSalt106,AraSalt107,AraSalt108,AraSalt110,AraSalt111)) #Remove all Morphospecies we dont have 28S Sequences for 

head(community)

mytree<-read.nexus("28S_Alignment_NRI.nex.con.tre")
mytree.phylo<-as.phylo(mytree)
mytree.phylo$tip.label


names(community) <- substring(names(community),8,10) #Change Names to morphospecies number only e.g AraSalt001->001

prune.tree<-prune.sample(community, mytree)

community<-community[,prune.tree$tip.label] #Morphospecies in the community matrix have to be in the same order as in the phylogenetic tree !


phy.dist<-cophenetic(mytree)
plot(pd)
hist(pd)


NRI<-ses.mpd(community,cophenetic(mytree.phylo),null.model="independentswap",abundance.weighted= FALSE,runs=999, iterations=1000)
#NRI_abund<-ses.mpd(community,cophenetic(mytree.phylo),null.model="independentswap",abundance.weighted= TRUE,runs=999, iterations=1000)


NTI<-ses.mntd(community,cophenetic(mytree.phylo),null.model="independentswap",abundance.weighted= FALSE,runs=999,iterations=1000)
#NTI_abund<-ses.mntd(community,cophenetic(mytree.phylo),null.model="independentswap",abundance.weighted= TRUE,runs=999, iterations=1000)

#Get values for models
NRI$NRI<-NRI$mpd.obs.z*-1
NRI_out<-NRI[,9,  FALSE] #This command keeps the rownames which is mandatory to indicate significance for each plot.
NRI_out$System<-Landuse
#rownames(NRI_out) <- NRI_out$System

NTI$NTI<-NTI$mntd.obs.z*-1
NTI_out<-NTI[,9,FALSE]
NTI_out$System<-Landuse

######Plotting phylogenetic Diversity###########


spiders.pd<-pd(community,mytree,include.root = FALSE)
spiders.pd$Landuse<-Landuse
spiders.pd$Landscape<-LandScape
pd<-spiders.pd[,1,FALSE]
pd<-unlist(pd,use.names=FALSE)
mean.pd<-tapply(pd,Landuse,mean)
sd.pd<-tapply(pd,Landuse,sd)

pd.forest<-dplyr::filter(spiders.pd, Landuse=="Forest") 
pd.forest<-pd.forest[,1,FALSE]
pd.forest<-unlist(pd.forest)
pd.forest<-apply(pd.forest,mean)
mean.forest<-mean(pd.forest)

par(mfrow=c(2,2))



shapiro.test(spiders.pd$PD)
PD_GLM<-glm(spiders.pd$PD~Landuse+LandScape)#Landuse signifikant
summary(PD_GLM)
anova_pd<-anova(PD_GLM,test="F")
anova_pd
PD_model<-glht(model=PD_GLM,linfct=mcp(Landuse="Tukey"))

PD_HSD <- summary(PD_model, adjusted(type = c(p.adjust.methods = c("holm"))))
summary(PD_HSD)

PD_HSD_LU_cld <- cld(PD_HSD, decreasing = T) 
PD_cld <- PD_HSD_LU_cld$mcletters$Letters
PD_cld

write.csv(cbind(z_value = PD_HSD$test$tstat, p_value = PD_HSD$test$pvalues), "PD_HSD.csv") # save test values for manuscript

write.csv(NRI,"NRI.csv")

Spiders_pd_pooled<- ggplot(spiders.pd, aes(x = Landuse, y =spiders.pd$PD )) +
  cleanup+
  scale_color_manual(values = system_col, aesthetics=(c("colour","fill"))) +
  geom_pirate(aes(colour = Landuse, fill = Landuse),
              bars = F, points_params = list(shape = 21, size = 1.5,colour="black"),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("Faith's Phylogenetic Diversity"), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(spiders.pd$PD)*1.02,
    label = PD_cld, size=5,vjust=0)+
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))

plot(Spiders_pd_pooled)
ggsave(Spiders_pd_pooled, filename = "PD_pooled.pdf", device = "pdf", width = 100, height = 100, units = "mm", dpi = 600)
####Statistics and plots for NRI / NTI#############
nri_interaction<-lm(NRI_out$NRI~Landuse*LandScape) 
summary(nri_interaction)
anova(nri_interaction,test="F")
NRI_GLM<-lm(NRI_out$NRI~Landuse+LandScape)#Landuse significant

shapiro.test(NRI_out$NRI)
shapiro.test(NTI_out$NTI)

ANOVA_NRI<-anova(NRI_GLM,test="F")
ANOVA_NRI
cat("ANOVA_NRI\n", file = "ANOVA_NRI", append = TRUE)
capture.output(ANOVA_NRI, file = "ANOVA_NRI", append = TRUE)

NRI_GLM
NRI_model<-glht(model=NRI_GLM,linfct=mcp(Landuse="Tukey"))
summary(NRI_model)
NRI_HSD <- summary(NRI_model, adjusted(type = c(p.adjust.methods = c("holm")))) #Oil Palm-Rubber*
summary(NRI_HSD)

NRI_HSD_LU_cld <- cld(NRI_HSD, decreasing = T) 
NRI_HSD_LU_cld
NRI_cld <- NRI_HSD_LU_cld$mcletters$Letters




##T-Tests to test for random assembly, Phylogenetic clustering, Phylogenetic overdispersion
NRI_oilpalm<-dplyr::filter(NRI_out, Landuse=="Oil Palm")
sd_NRI_oil<-sd(NRI_oilpalm$NRI) 
t_oilpalm<-t.test(NRI_oilpalm$NRI) #
t_oilpalm
pvalue_oil<-t_oilpalm$p.value
pvalue_oil<-as.data.frame(pvalue_oil)
pvalue_oil<-round(pvalue_oil,digits=4)
cat("t_oilpalm_NRI\n", file = "NRI_t_oilpalm", append = TRUE)
capture.output(t_oilpalm, file = "NRI_t_oilpalm", append = TRUE)

bartlett.test(NRI_out$NRI~Landuse) #all fine
shapiro.test(NRI_oilpalm$NRI)
shapiro.test(NRI_oilpalm$NRI)
NRI_rubber<-dplyr::filter(NRI_out,System=="Rubber")
sd_NRI_rubber<-sd(NRI_rubber$NRI) 
t_rubber<-t.test(NRI_rubber$NRI) #Signifikant
t_rubber
pvalue_rubber<-t_rubber$p.value
pvalue_rubber<-as.data.frame(pvalue_rubber)
pvalue_rubber<-round(pvalue_rubber,digits=4)

cat("t_rubber\n", file = "NRI_t_rubber", append = TRUE)
capture.output(t_rubber, file = "NRI_t_rubber", append = TRUE)


NRI_forest<-dplyr::filter(NRI_out,System=="Forest")
sd_NRI_forest<-sd(NRI_forest$NRI) 
t_forest<-t.test(NRI_forest$NRI) 
pvalue_forest<-t_forest$p.value
pvalue_forest<-round(pvalue_forest,digits=4)

cat("t_forest\n", file = "NRI_t_forest", append = TRUE)
capture.output(t_forest, file = "NRI_t_forest", append = TRUE)

NRI_JungleRubber<-dplyr::filter(NRI_out,System=="Jungle Rubber")
SD_JungleRubber<-sd(NRI_JungleRubber$NRI)
t_junglerubber<-t.test(NRI_JungleRubber$NRI)
pvalue_junglerubber<-t_junglerubber$p.value
pvalue_junglerubber<-round(pvalue_junglerubber)

cat("t_junglerubber\n", file = "NRI_t_junglerubber", append = TRUE)
capture.output(t_junglerubber, file = "NRI_t_junglerubber", append = TRUE)

#Plotting
system_col = c("#15983DFF", "#0C5BB0FF", "#FEC10BFF", "#EE0011FF") # green, blue, yellow, red, from the pirate plots basel palette
system_col_double <- c(system_col, system_col)
double_col <- rep(system_col, each = 2)
system_names = c("Forest", "Jungle Rubber", "Rubber", "Oil Palm")
LU_abb = c("F", "J", "R", "O")


nri_pirate_pooled <- ggplot(NRI_out, aes(x = Landuse, y =NRI_out$NRI )) +
  cleanup+
  scale_color_manual(values = system_col,aesthetics=(c("colour","fill"))) +
  
  geom_pirate(aes(colour = Landuse, fill = Landuse),
              bars = F, points_params = list(shape = 21, size = 1.5,colour="black"),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("Net Relatedness Index"), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(NRI_out$NRI)*1.02,
    label = NRI_cld, size=5,vjust=0)+
  annotate("text",x=3,y=-2.15,label=paste0('atop(bold("0.10<p>0.05"))'),cex=3,parse=TRUE)+
  annotate("text",x=4,y=-2.15,label=paste0('atop(bold("0.10<p>0.05"))'),cex=3,parse=TRUE)+
  annotate("text",x=1,y=-2.15,label=paste0('atop(bold("p>0.10"))'),cex=3,parse=TRUE)+
  annotate("text",x=2,y=-2.15,label=paste0('atop(bold("p>0.10"))'),cex=3,parse=TRUE)+#
  theme(text = element_text(size = 14),
        axis.text = element_text(size = 14))

plot(nri_pirate_pooled)
ggsave(nri_pirate_pooled, filename = "PD_pooled.pdf", device = "pdf", width = 100, height = 100, units = "mm", dpi = 600)

#####NTI#####

nti_interaction<-lm(NTI_out$NTI~Landuse*LandScape) #not significant
summary(nti_interaction)
anova(nti_interaction,test="F")
NTI_GLM<-glm(NTI_out$NTI~Landuse+LandScape)#Landuse significant
ANOVA_NTI<-anova(NTI_GLM,test="F")
ANOVA_NTI

cat("ANOVA_NTI\n", file = "ANOVA_NTI", append = TRUE)
capture.output(ANOVA_NTI, file = "ANOVA_NTI", append = TRUE)

NTI_model<-glht(model=NTI_GLM,linfct=mcp(Landuse="Tukey"))
summary(NTI_model)
NTI_HSD <- summary(NTI_model, adjusted(type = c(p.adjust.methods = c("holm"))))
summary(NTI_HSD)

NTI_HSD_LU_cld <- cld(NTI_HSD, decreasing = T) 
NTI_cld <- NTI_HSD_LU_cld$mcletters$Letters

##T-Tests to test for random assembly, Phylogenetic clustering, Phylogenetic overdispersion

NTI_oilpalm<-dplyr::filter(NTI_out, System=="Oil Palm")
sd_NTI_oil<-sd(NTI_oilpalm$NTI) 
t_oilpalm_NTI<-t.test(NTI_oilpalm$NTI)
t_oilpalm_NTI
p_NTI_oil<-t_oilpalm_NTI$p.value
p_NTI_oil<-round(p_NTI_oil,digits=4) #0,045


cat("t_oilpalm_NTI\n", file = "nti_t.test_oil", append = TRUE)
capture.output(t_oilpalm_NTI, file = "nti_t.test_oil", append = TRUE)



NTI_forest<-dplyr::filter(NTI_out, System=="Forest")
sd_NTI_forest<-sd(NTI_forest$NTI)
t_forest_NTI<-t.test(NTI_forest$NTI)
t_forest_NTI
p_NTI_forest<-t_forest_NTI$p.value
p_NTI_forest<-round(p_NTI_forest,digits=4) 



cat("t_forest_NTI\n", file = "nti_t.test_forest", append = TRUE)
capture.output(t_forest_NTI, file = "nti_t.test_forest", append = TRUE)

NTI_rubber<-dplyr::filter(NTI_out,System=="Rubber")
sd_NTI_rubber<-sd(NTI_rubber$NTI)
t_rubber_NTI<-t.test(NTI_rubber$NTI)
t_rubber_NTI
p_NTI_rubber<-t_rubber_NTI$p.value
p_NTI_rubber<-round(p_NTI_rubber,digits=4)

cat("t_rubber_NTI\n", file = "nti_t.test_rubber", append = TRUE)
capture.output(t_rubber_NTI, file = "nti_t.test_rubber", append = TRUE)


NTI_JungleRubber<-dplyr::filter(NTI_out,System=="Jungle Rubber")
sd_NTI_JungleRubber<-sd(NTI_JungleRubber$NTI)
t_junglerubber_NTI<-t.test(NTI_JungleRubber$NTI)
t_junglerubber_NTI
p_NTI_junglerubber<-t_junglerubber_NTI$p.value
p_NTI_junglerubber<-round(p_NTI_junglerubber,digits=4)


cat("t_junglerubber_NTI\n", file = "nti_t.test_junglerubber", append = TRUE)
capture.output(t_junglerubber_NTI, file = "nti_t.test_junglerubber", append = TRUE)


NTI_pirate_pooled <- ggplot(NTI_out, aes(x = System, y =NTI_out$NTI )) +
  cleanup+
  scale_color_manual(values = system_col,aesthetics = c("colour","fill")) +
  
  
  geom_pirate(aes(colour = Landuse, fill = Landuse),
              bars = F, points_params = list(shape = 21, size = 1.5,colour="black"),
              lines_params = list(size = 0.5, width = 0.5),
              cis_params = list(fill = system_col, size = 0.6, alpha = 0.5, width = 0.5),
              violins_params = list(fill = "white", size = 0.6, alpha = 1, width = 1, color = "black")) +
  labs(y = expression("Nearest Taxon Index"), x = NULL) +
  scale_x_discrete(labels = LU_abb)  + 
  theme(strip.background = element_rect(fill = "white", colour = "black")) +
  annotate(
    geom = "text",
    x = c(1:4),
    y = max(NTI_out$NTI)*1.02,
    label = NTI_cld,size=5,vjust=0)+
  annotate("text",x=3,y=-2.8,label=paste0('atop(bold("p>0.10"))'),cex=3,parse=TRUE)+
  annotate("text",x=4,y=-2.8,label=paste0('atop(bold("*p<0.05"))'),cex=3,parse=TRUE)+
  annotate("text",x=1,y=-2.8,label=paste0('atop(bold("p>0.10 "))'),cex=3,parse=TRUE)+
  annotate("text",x=2,y=-2.8,label=paste0('atop(bold("*p<0.05"))'),cex=3,parse=TRUE)+
  theme(text=element_text(size=14),
        axis.text=element_text(size=14))

#ns: p>0.10, .: 0.10<p>0.05, * p<0.05


plot(NTI_pirate_pooled)

ggsave(NTI_pirate_pooled, filename = "NTI_pooled.pdf", device = "pdf", width = 100, height = 100, units = "mm", dpi = 600)

dev.off()

nti_nri<-grid.arrange(NTI_pirate_pooled,nri_pirate_pooled,nrow=1)
plot(nti_nri)
ggsave(nti_nri, filename = "nti_nri_plot.pdf", device = "pdf", width = 210, height = 100, units = "mm", dpi = 600)
ggsave(nti_nri, filename = "nti_nri_plot.png", device = "png", width = 210, height = 100, units = "mm", dpi = 600)
###End of Code#######