require(vegan)
require(sads)
require(fields)
require(dplyr)
require(visreg)
require(ggplot2)
se <- function(x) {sd(x, na.rm=T)/sqrt(length(x[!is.na(x)]))} #function for standard error

##run same code for each of five datasets with some exceptions (see annotation)

#get data
setwd("...")
##for by-sample (unpooled) Safaga datasets
ta<-read.csv("Safaga_all.csv", header=T, sep=",", row.names=1) #includes entire dataset
# ta<-read.csv("Safaga_top10.csv", header=T, sep=",", row.names=1) #includes only top 10 species
env<-read.csv("Safaga_environments.csv", header=T, sep=",", row.names=1)   #for by-sample Safaga dataset (unpooled)

##for by-site (pooled) Safaga datasets
# ta<-read.csv("Safaga_pooled_top10.csv", header=T, sep=",", row.names=1) #includes only top 10 species
## ta<-read.csv("Safaga_pooled_all.csv", header=T, sep=",", row.names=1) #includes entire dataset
# env<-read.csv("Safaga_depth_pooled_new.csv", header=T, sep=",", row.names=1)  #for by-site Safaga dataset (pooled)

##for Cassian dataset
# Cas_dat<-read.csv("Cassian_top10.csv", header=T, sep=",")
# ta <- tapply(Cas_dat$Specimens, list(Cas_dat$Sample, Cas_dat$Species), sum)  #only for Cassian dataset
# env<-read.csv("paleodepth.csv", header=T, sep=",", row.names=1)

#for each of the five datasets
ta.0 <- ta
ta.0[is.na(ta)] <- 0

#alpha diversity #Tables 1,2
bp <- apply(ta.0, 1, max)/apply(ta.0, 1, sum) # berger-parker index
specs <- rowSums(ta.0) #number of specimens

#plot rank abundance distributions for Cassian dataset #Fig. S12 #only Cassian
  rank <- list()
  
  for (j in 1:nrow(ta)){
    rank[[j]] <- rad(ta.0[j,])
    rank <- as.matrix(rank)
    rank[[j]][,2]<-log10(rank[[j]][,2])  #use logarithm
  }
  
  
   op <- par(mfrow=c(2,4), mar = c(15,5,5,5) + 1.5)
    for (k in 1: nrow(ta)){
    num <- 1:nrow(rank[[k]])
    barplot(rank[[k]][,2], main = rownames(ta)[k], xlim=c(0,10), 
            ylim=c(0,2.5),las=2,  names = rownames(rank[[k]]), cex.names=1.4)
     }
  

#evenness #Tables 1,2
s <- length(which(colSums(ta.0)!=0)) #number of species
J <- diversity(ta.0) / log(s)

#beta diversity
ta.0 <- as.matrix(ta.0)
ta.prop <-prop.table(ta.0, margin=1) #change to proportions
ppd <- vegdist(ta.prop, method="bray") #distance matrix
range(ppd) #range
mean(ppd) #mean beta diversity
se(ppd) #standard error
#to calculate mean beta diversity of each site
dppd <- as.matrix(ppd)
dppd[dppd==0] <- NA #replace zeros with NA to calculate means and standard error
locs <- apply(dppd, 2, FUN=mean, na.rm=T) #mean beta diversity per site with regard to other sites #Tables 1,2
locses <- apply(dppd, 2, FUN=se) #standard error for each mean beta #Tables 1,2

###correlate beta diversity with depth
cor.test(env$Depth,locs, method="spearman") #beta diversity and depth
cor.test(env$Depth, locs, method="pearson") #beta diversity and depth

#correlate alpha diversity with depth
cor.test(env$Depth,bp, method="spearman") #Berger-Parker index
cor.test(env$Depth,bp, method="pearson") #Berger-Parker index
cor.test(env$Depth,J, method="spearman") #evenness
cor.test(env$Depth,J, method="pearson") #evenness

###NMDS
mds <- metaMDS(ta.prop, distance = "bray")

#NMDS with bathymetry
mds_b <- envfit(mds, env$Depth, display="sites")

#NMDS with bathymetry for by-sample (unpooled) Safaga only (Fig. S7)
col3 <- c("deepskyblue","deepskyblue","deepskyblue","deepskyblue","dodgerblue3","dodgerblue3","blue4","blue4","dodgerblue3","deepskyblue","deepskyblue","deepskyblue","blue4") #deepskyblue is very shallow, dodgerblue3 is shallow, blue4 is deeper
plot(mds, display="sites", type="n", xlim = c(-3.5,3.5))
points(mds,pch=15,cex=1.5,col=col3)
posvector <- c(3,1,4,2,1,4,1,3,1,1,3,1,1)
text(mds, display="sites", col=col3, cex=1.3, pos=posvector)
mds_b <- envfit(mds, env$Depth, display="sites", p.max=0.05)
plot(mds_b, labels="Depth",cex=1.5, col="black")
legend("bottomright",fill=c("deepskyblue","dodgerblue3","blue4"),legend=c("very shallow", "shallow", "deeper"))


#NMDS with bathymetry for by-site (pooled) Safaga only (Fig. 8)
col2 <- c("deepskyblue","dodgerblue3","blue4","dodgerblue3","deepskyblue","deepskyblue","deepskyblue","blue4") #deepskyblue is very shallow, dodgerblue3 is shallow, blue4 is deeper
plot(mds, display="sites", type="n", ylim = c(0.35, -0.25), xlim = c(-0.6, 0.5))
points(mds,pch=15,cex=1.5,col=col2)
posvector <- c(1,1,2,1,1,4,1,1)
text(mds, display="sites", col=col2, cex=1.3, pos=posvector)
mds_b <- envfit(mds, env$Depth, display="sites", p.max=0.05)
plot(mds_b, labels="Depth",cex=1.5, col="black")
legend("topright",fill=c("deepskyblue","dodgerblue3","blue4"),legend=c("very shallow", "shallow", "deeper"))

#NMDS with bathymetry for Cassian only (Fig. 6)
col1 <- c("dodgerblue3","deepskyblue","dodgerblue3","blue4","deepskyblue","deepskyblue","blue4","deepskyblue" )  #deepskyblue is very shallow, dodgerblue3 is shallow, blue4 is deeper
plot(mds, display="sites", type="n")
points(mds,pch=15,cex=1.5,col=col1)
posvector <- c(4,3,3,4,2,2,4,2)
text(mds, display="sites", col=col1, cex=1.3, pos=posvector)
plot(mds_b, labels="Depth",cex=1.5, col="black")
legend("bottomright",fill=c("deepskyblue","dodgerblue3","blue4"),legend=c("very shallow", "shallow", "deeper"))
mds_b <- envfit(mds, env$Depth, display="sites", p.max=0.05)
plot(mds_b, labels="Depth",cex=1.5, col="black")

###beta diversity among depth ranges

##test significance of depth clusters
ad <- adonis(ppd ~  Depth_groups3, data = env) #for three groups (as in figures)
# ad <- adonis(ppd ~  Depth_groups2, data = env) #for two depth groups
ad

##two groups
med<-median(env$Depth)  #calculate median depth
samp_shal <- rownames(env[which(env$Depth<med),])  #names of samples from shallower water depths
samp_deep <- rownames(env[which(env$Depth>=med),]) #names of samples from deeper water depths
shal <- ta.0[samp_shal,]  #create species composition matrix with samples from shallower water depths
deep <- ta.0[samp_deep,]  #create species composition matrix with samples from deeper water depths
prop.shal <-prop.table(shal, margin=1)  #calculate proportions shallow
ppd.shal <- vegdist(prop.shal, method="bray")   #calculate dissimilarities shallow
mean(ppd.shal) #mean dissimilarity shallow
se(ppd.shal)  #standard error shallow
prop.deep <-prop.table(deep, margin=1)  #calculate proportions deep
ppd.deep <- vegdist(prop.deep, method="bray")   #calculate dissimilarities deep
mean(ppd.deep) #mean dissimilarity deep
se(ppd.deep)    #standard error deep

##three groups
env
#Safaga by-site (pooled)
mean(ppd.shal) #mean dissimilarity shallow
se(ppd.shal)  #standard error shallow
int <- ta.0[c(2,4),]  #create species composition matrix with samples from intermediate water depths
prop.int <-prop.table(int, margin=1)  #calculate proportions shallow
ppd.int <- vegdist(prop.int, method="bray")   #calculate dissimilarity intermediate
deepest <- ta.0[c(3,8),]  #create species composition matrix with samples from deepest water depths
prop.deepest <-prop.table(deepest, margin=1)  #calculate proportions deepest
ppd.deepest <- vegdist(prop.deepest, method="bray")   #calculate dissimilarity deepest

# #Cassian
# mean(ppd.shal) #mean dissimilarity shallow
# se(ppd.shal)  #standard error shallow
# int <- ta.0[c(1,3),]  #create species composition matrix with samples from intermediate water depths
# prop.int <-prop.table(int, margin=1)  #calculate proportions shallow
# ppd.int <- vegdist(prop.int, method="bray")   #calculate dissimilarity intermediate
# deepest <- ta.0[c(4,7),]  #create species composition matrix with samples from deepest water depths
# prop.deepest <-prop.table(deepest, margin=1)  #calculate proportions deepest
# ppd.deepest <- vegdist(prop.deepest, method="bray")   #calculate dissimilarity deepest

### beta diversity between depth ranges
##beta diversity between two depth ranges
shal_group <- colSums(shal)  #group all shallow samples
deep_group <- colSums(deep) #group all deeper samples
depth2 <- rbind(shal_group, deep_group) #create dataframe from the 2 groups
depth2.prop <-prop.table(depth2, margin=1) #calculate proportions
ppd.depth2 <- vegdist(depth2.prop, method="bray") #calculate dissimilarity between shallow and deep samples

##beta diversity between 3 depth ranges
deepest_group <- colSums(deepest) #group deeepest samples
int_group <- colSums(int)   #group intermediate samples
depth3 <- rbind(shal_group, int_group, deepest_group) #create dataframe from the 3 groups
depth3.prop <-prop.table(depth3, margin=1) #calculate proportions
ppd.depth3 <- vegdist(depth3.prop, method="bray") #calculate dissimilarity between samples

##Compare depth groups
#Kruskal-Wallis test
kruskal.test(shal_group ~ deep_group) #comparison between two groups, shallow and deeper
kruskal.test(shal_group ~ int_group) #comparison between shallow and intermediate
kruskal.test(int_group ~ deepest_group) #comparison between intermediate and deep
kruskal.test(shal_group ~ deepest_group) #comparison between shallow and deep


##distance decay #Figs. 5,S5
# #Cassian
# env <- read.csv("gps_cas.csv", header=T, sep=",", row.names=1) #get coordinates Cassian

dist <- env[,c("latdec","lngdec" )]

rdist <- rdist.earth(dist, miles=FALSE) #compute great circle distance matrix among all pairings
mdist <- as.dist(rdist, diag = F) #create distance matrix
vec_ppd<- as.vector(ppd) #create vector for pairwise dissimilarities
mdistvec <- as.vector(mdist) #create vector for pairwise distances
fit <- lm(vec_ppd~mdistvec) #linear regression for plot
theme=theme_set(theme_bw()+theme(panel.grid=element_blank())) #adjust plot theme
visreg(fit, xlab="Distance (km)", gg=T, ylab = "PPD", points=list(size=2, pch=15)) #plot
#plot(mdist,ppd, xlab="Distance (km)", ylab = "Mean PPD")
#abline(lm(ppd~mdist))
cor.test(mdist, ppd, method="spearman")  #correlation between distance and dissimilarity
cor.test(mdist, ppd, method="pearson")  #correlation between distance and dissimilarity


###NULL MODEL FOR BETA DIVERSITY
dat <- colSums(ta.0) #create vector from ta.0
dat <- as.data.frame(dat) #create dataframe for further analyses
#create vector with number of specimens to be sampled
spe <- rowSums(ta.0)
dat[,2] <- dat[,1]/sum(dat[,1]) #abundance
abu <- dat[,2] #create abundance vector
abu <- as.vector(abu) #create abundance vector

ppd_null <- list()
ppd_vec <- list()

for (p in 1:1000){
  #sample from dataframe
  all <- numeric()
  vec <- list()
  for (m in 1:nrow(ta.0)){
    
    vec[[m]] <- sample(rownames(dat), spe[m], prob=abu, replace=T)
    
  }
  
  tab <- list()
  for (n in 1:nrow(ta.0)){
    tab[[n]] <- table(vec[n])
    tab[[n]] <- as.data.frame(tab[[n]])
  }
  ##merge tables by species
  ta.n <- tab[[1]]
  for (o in 1:(nrow(ta.0)-1)){
    
    ta.n <- full_join(ta.n, tab[[o+1]], by="Var1")
    
  }
  ta.n[is.na(ta.n)] <- 0
  ta.n <- as.data.frame(ta.n)
  rownames(ta.n) <- ta.n[,1]
  ta.n <- ta.n[,-1]
  ta.n <- t(ta.n)
  ta.prop <-prop.table(ta.n, margin=1)
  veg <- vegdist(ta.prop, method="bray")
  ppd_null[[p]] <- mean(veg)
  ppd_vec[[p]] <- as.vector(veg)
}

ppd_null <- unlist(ppd_null)  #unlist to create vector with all mean PPDs
ppd_vec <- unlist(ppd_vec)  #unlist to create vector with all pairwise dissimilarites

vec_ppd <- as.vector(ppd) #values of true dataset

#plot null model box plot and boxplot with true values #Fig. 4
#first combine datasets
lmts <- c(0,1) #range of values
par(mfrow = c(1, 2))
boxplot(vec_ppd, ylim=lmts, xlab="", ylab="PPD") #true dataset
boxplot(ppd_vec, ylim=lmts, xlab="") #null model

#histograms #Fig. S13
true <- hist(vec_ppd)
null <- hist(ppd_vec)
par(mar = c(5, 4, 4, 4) + 0.3) 
plot(true, col="gray82", xlab="", main="",xlim=c(0,1))
par(new=T)
plot(null, col="gray46", xlim=c(0,1), axes=F, main="", xlab = "PPD", ylab = "")
axis(side=4)

##############
#HOMOGENEITY OF DISPERSION #Figs. S10,S11
#code can be run from here

require(vegan)

se <- function(x) {sd(x)/sqrt(length(x))}#function for standard error
mdist <- function(x){
  x[is.na(x)] <- 0
  y<-prop.table(x, margin=1)   #create proportions out of raw abundances
  z <- vegdist(y)              #Bray-Curtis dissimilarity
  lt = list(PPD=mean(z),SE=se(z))     # mean pairwise dissimilarity, standard error
  return(lt)               
}

#get cassian data
setwd("...")

cas <- read.csv(file="Cassian_top10.csv", sep=",", header=T)
#View(cas)
ta <- tapply(cas$Specimens, list(cas$Sample, cas$Species), sum)
#View(ta)
ta.0 <- ta
ta.0[is.na(ta)] <- 0

prop <- prop.table(ta.0, margin=1)
d <- vegdist(prop.table(ta.0, margin=1))
rownames(ta.0)
g <- factor(c("deep","shallow","deep","deep", "shallow","shallow","deep","shallow"))
disp <- betadisper(d,g, type="median") #dispersion of hopmogeneity
scores(disp, display="sites")
plot(disp, main = "Dispersion of homogeneity", sub="", ellipse=F)
plot(disp, main = "Dispersion of homogeneity", sub="", ellipse=T, ylim=c(-0.4,0.5))

#use modified Gower (Anderson et al 2006)
gow <- vegdist(ta.0,method="altGower")
gowdisp <- betadisper(gow,g,type="median")
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower",
     ylim=c(-30,30), xlim=c(-40,50), sub="", ellipse=T)

# # #repeat for 5 shallow and 3 deep
# g2 <- factor(c("deep","shallow","shallow","deep", "shallow","shallow","deep","shallow"))
# disp2 <- betadisper(d,g2) #dispersion of hopmogeneity
# # scores(disp3)
# plot(disp2,main = "Dispersion of homogeneity", sub="", ellipse=T)

#repeat without classes
#bray
g5 <- factor(rep("centroid",8))
disp5 <- betadisper(d,g5) #dispersion of homogeneity
# scores(disp5)
plot(disp5,main = "Dispersion of homogeneity", sub="", ellipse=T)
#altGower
#gow <- vegdist(ta.0,method="altGower")
gowdisp5 <- betadisper(gow,g5,type="median")
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower",
     ylim=c(-30,30), xlim=c(-40,50), sub="", ellipse=T)

#figure with bray and gower and grouped and not grouped
op <- par(mfrow=c(2,2))
plot(disp5,main = "Dispersion of homogeneity using PPD", sub="", ellipse=T)
plot(gowdisp5, main = "Dispersion of homogeneity using modified Gower",
     ylim=c(-30,30), xlim=c(-40,50), sub="", ellipse=T)
plot(disp, main = "Dispersion of homogeneity using PPD", sub="", ellipse=T, ylim=c(-0.4,0.5))
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower",
     ylim=c(-30,30), xlim=c(-40,50), sub="", ellipse=T)

#########################
#repeat for SAFAGA
#get safaga data
setwd("...")
ta.0<-read.csv("Safaga_pooled_top10.csv", header=T, sep=",", row.names=1) #for by-site (pooled) Safaga dataset

ta.0<-as.matrix(ta.0)
#prop <- prop.table(ta.0, margin=1)
d <- vegdist(prop.table(ta.0, margin=1))
rownames(ta.0)
g <- factor(c("shallow","deep","deep", "deep","shallow","shallow","shallow","deep"))
disp <- betadisper(d,g, type="median") #dispersion of hopmogeneity
scores(disp, display="sites")
#windows(h=8, w=10) #plotting in a large window will reduce label size
#plot(disp, main = "Dispersion of homogeneity", sub="", ellipse=F)
plot(disp, main = "Dispersion of homogeneity", sub="", ellipse=T, ylim=c(-0.5,0.4), xlim=c(-0.7,0.5))

#use modified Gower (Anderson et al 2006)
gow <- vegdist(ta.0,method="altGower")
gowdisp <- betadisper(gow,g,type="median")
#windows(h=8, w=10) #plotting in a large window will reduce label size
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower",
     ylim=c(-130,200), xlim=c(-220,220), sub="", ellipse=T)

# # #repeat for 5 shallow and 3 deep
# g2 <- factor(c("deep","shallow","shallow","deep", "shallow","shallow","deep","shallow"))
# disp2 <- betadisper(d,g2) #dispersion of hopmogeneity
# # scores(disp3)
# plot(disp2,main = "Dispersion of homogeneity", sub="", ellipse=T)

#repeat without classes
#bray
g5 <- factor(rep("centroid",8))
disp5 <- betadisper(d,g5) #dispersion of homogeneity
# scores(disp5)
plot(disp5,main = "Dispersion of homogeneity", sub="", ellipse=T)
#altGower
#gow <- vegdist(ta.0,method="altGower")
gowdisp5 <- betadisper(gow,g5,type="median")
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower", sub="", ellipse=T)

#figure with bray and gower and grouped and not grouped
#windows(h=8, w=10) #plotting in a large window will reduce label size
op <- par(mfrow=c(2,2))
plot(disp5,main = "Dispersion of homogeneity using PPD", sub="", ellipse=T)
plot(gowdisp5, main = "Dispersion of homogeneity using modified Gower",
     sub="", ellipse=T)
plot(disp, main = "Dispersion of homogeneity using PPD", sub="", ellipse=T)
plot(gowdisp, main = "Dispersion of homogeneity using modified Gower",
     sub="", ellipse=T)