# Load pacakages
library(ade4)
library(adegraphics)
library(adespatial)
library(vegan)
library(MASS)
## read data file "Site2.Nantong1(Div.index).txt"
Nantong1 <- read.table(choose.files(), header=T)
summary(Nantong1)
names(Nantong1)
#
Nantong1.xy <- Nantong1[,c(4,5)]
Nantong1.xy
## model matrix
Nantong1.mm = model.matrix(~ veg.,Nantong1)[,-1]
# Nantong 1 Abundance
Nantong1.N <- Nantong1[,c(6,8,10,12,14,16,18,20)]
names(Nantong1.N)
summary(Nantong1.N)
colSums(Nantong1.N)
rowSums(Nantong1.N)
#
Nantong1.N.mat <- as.matrix(Nantong1.N)
Nantong1.N.mat
Nantong1.N.hel <- (decostand(Nantong1.N.mat, "hell"))
Nantong1.N.hell <- as.matrix(scale(Nantong1.N.hel, center = TRUE, scale = FALSE))
summary(Nantong1.N.hell)
## pcnm
Nantong1.N.rs <- rowSums(Nantong1.N.mat)/sum(Nantong1.N.mat)
Nantong1.pcn <- pcnm(dist(Nantong1.xy), w = Nantong1.N.rs)
attributes(Nantong1.pcn$vectors)
Nantong1.N.pcnm = as.matrix(Nantong1.pcn$vectors)
## varpart of two explanatory variables; vegetation and euclidean distance
Nantong1.N.var <- varpart(Nantong1.N.hell, Nantong1.mm, Nantong1.N.pcnm)

######## Nantong1 Diversity
Nantong1.H <- Nantong1[,c(9,13,15,17,19)]
names(Nantong1.H)
colSums(Nantong1.H)
rowSums(Nantong1.H)
#
Nantong1.xy <- Nantong1[,c(4,5)]
Nantong1.xy
## model matrix
Nantong1.mm = model.matrix(~ veg.,Nantong1)[,-1]
# DATA matrix
Nantong1.H.mat <- as.matrix(Nantong1.H)
Nantong1.H.mat
Nantong1.H.hel <- (decostand(Nantong1.H.mat, "hell"))
Nantong1.H.hell <- as.matrix(scale(Nantong1.H.hel, center = TRUE, scale = FALSE))
summary(Nantong1.H.hell)
## pcnm
Nantong1.H.rs <- rowSums(Nantong1.H.mat)/sum(Nantong1.H.mat)
Nantong1.pcn <- pcnm(dist(Nantong1.xy), w = Nantong1.H.rs)
attributes(Nantong1.pcn$vectors)
Nantong1.H.pcnm = as.matrix(Nantong1.pcn$vectors)
## varpart of two explanatory variables; vegetation and euclidean distance
Nantong1.H.var <- varpart(Nantong1.H.hell, Nantong1.mm, Nantong1.H.pcnm)
Nantong1.H.var
showvarparts(2)
plot(Nantong1.H.var,bg=1:3)

## rda to show the relative imporatnce between surrouding vegetation and geographical distance
Nantong1.H.rda = rda(Nantong1.H.hell, Nantong1.mm, Nantong1.H.pcnm)
summary(Nantong1.H.rda)
Nantong1.H.rda
# regression coefficients for each explanatory variable
coef(Nantong1.H.rda)
# retrieve R2 from rda results
(R2 <- RsquareAdj(Nantong1.H.rda)$r.squared) # which is "[1] 0.1911394"
# Adjusted R2 retrived from the rda object
(R2adj <- RsquareAdj(Nantong1.H.rda)$adj.r.squared) # which is "[1] 0.1402984"
# Triplot of rda results
plot(Nantong1.H.rda, scaling =2, main = "Triplot RDA for spider fami. Shannon-diversity in Nantong 1 
     ~ surrouding veg.+pcnm",
     xlab = "RDA1(proportion exlpained=26%)", 
     ylab = "RDA2(proportion explained=13%)")
# This plot displays all our entities: sites, species
# explanatory variables as arrows (heads)
# species are in red colour without the arrows
# Let's add arrows to the species without heads to make them different from the output object
Nantong1.H.scor <- scores(Nantong1.H.rda, choices=1:2, scaling=1, display= "sp")
arrows(0, 0, Nantong1.H.scor[,1], Nantong1.H.scor[,2], length=0, lty=1, col="red")
# RDA triplot of the Hellinger-transformed spider shannon diversity data in Nantong 1
# constrained by all environmental variables, scaling 1. 
# The bottom and left-hand scales are for the objects and the
# response variables, the top and right-hand scales 
# are for the explanatory variables
# Let's test the significance of "rda" resluts
anova(Nantong1.H.rda, step=1000) # F= 1.7488, Pr(>F)=0.11

#
### HEATMAPS OF BETA DISSIMILARITY IN Nantong1
# HELP WEBSITE
# http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/
# 
library(gplots)  # for heatmap.2
library(Heatplus)
# load the vegan package for hierachical clustering if you want to use distance functions not specified in dist.
library(vegan)
# load the RColorBrewer package for better colour options
library(RColorBrewer)
library(pvclust)
library(ComplexHeatmap)
#
row.names <- Nantong1[,c(3)]
Nantong1.N <- Nantong1[, c(6,8,10,12,14,16,18,20)]
names(Nantong1.N)
# colorRampPalette is in the RColorBrewer package.  
#This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours
scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100)
# The most basic of heatmap
heatmap(as.matrix(Nantong1.N), Rowv = NA, Colv = NA, col = scaleyellowred)
#determine the maximum relative abundance for each column
maxab <- apply(Nantong1.N, 2, max)
head(maxab)
# the margins command sets the width of the white space around the plot. 
# The first element is the bottom margin and the second is the right margin
heatmap(as.matrix(Nantong1.N), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2))
# you have to transpose the dataset to get the genera as rows
data.dist.g <- vegdist(t(Nantong1.N), method = "bray")
data.dist.g
# Do complete linkage hierarchical clustering. Other options are 'average' or 'single'. 
# do the complete linkage hierarchical clustering
col.clus <- hclust(data.dist.g, "complete")
col.clus
# make the heatmap with Rowv = NULL
heatmap(as.matrix(Nantong1.N), Rowv = NULL, 
        Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3))

# There are 25 samples in the dataset, 
# so let's assign them randomly to one of two fictional categories
var1 <- round(runif(n = 25, min = 1, max = 2))  # this randomly samples from a uniform distribution and rounds the result to an integer value
# change the 10s and 1s to the names of colours:
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
var1 <- replace(var1, which(var1 == 2), "magenta")
# if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g.
cbind(row.names(Nantong1.N), var1)
heatmap.2(as.matrix(Nantong1.N),Rowv = NULL, Colv = as.dendrogram(col.clus), 
          col = scaleyellowred, RowSideColors = var1, margins = c(10, 3))
# this puts in the annotation for the samples
heatmap.2(as.matrix(Nantong1.N), Rowv = NULL, 
          Colv = as.dendrogram(col.clus), col = scaleyellowred, 
          RowSideColors = var1, margins = c(11, 5), trace = "none", 
          density.info = "none", xlab = "genera", ylab = "Samples",
          main = "Nantong1 Abundance", lhei = c(2, 8))
# pvclust to test the goodness of cluster
result <- pvclust(Nantong1.N, method.dist="correlation", method.hclust="ward.D2", nboot=1000)
result
plot(result)

# Nantong1 Shannon Diversity
Nantong1.Div <- Nantong1[, c(9,13,15,17,19)]
names(Nantong1.Div)
colSums(Nantong1.Div)
# colorRampPalette is in the RColorBrewer package.  
#This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours
scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100)
# The most basic of heatmap
heatmap(as.matrix(Nantong1.Div), Rowv = NA, Colv = NA, col = scaleyellowred)
#determine the maximum relative abundance for each column
maxab <- apply(Nantong1.Div, 2, max)
head(maxab)
# the margins command sets the width of the white space around the plot. 
#The first element is the bottom margin and the second is the right margin
heatmap(as.matrix(Nantong1.Div), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2))
# you have to transpose the dataset to get the genera as rows
data.dist.g <- vegdist(t(Nantong1.Div), method = "bray")
data.dist.g
# Do complete linkage hierarchical clustering.
col.clus <- hclust(data.dist.g, "complete")
col.clus
# make the heatmap with Rowv = NULL
heatmap(as.matrix(Nantong1.Div), Rowv = NULL, 
        Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3))

# There are 25 samples in the dataset, 
# so let's assign them randomly to one of two fictional categories
var1 <- round(runif(n = 25, min = 1, max = 2))  # this randomly samples from a uniform distribution and rounds the result to an integer value
# change the 10s and 1s to the names of colours:
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
var1 <- replace(var1, which(var1 == 2), "magenta")
# if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g.
cbind(row.names(Nantong1.Div), var1)
heatmap.2(as.matrix(Nantong1.Div),Rowv = NULL, Colv = as.dendrogram(col.clus), 
          col = scaleyellowred, RowSideColors = var1, margins = c(10, 3))
# this puts in the annotation for the samples
heatmap.2(as.matrix(Nantong1.Div), Rowv = NULL, 
          Colv = as.dendrogram(col.clus), col = scaleyellowred, 
          RowSideColors = var1, margins = c(11, 5), trace = "none", 
          density.info = "none", xlab = "genera", ylab = "Samples",
          main = "Nantong1 Diversity", lhei = c(2, 8))
#
result.d <- pvclust(Nantong1.Div, method.dist="uncentered", method.hclust="ward.D2", nboot=1000)
result.d
plot(result.d)

### dbMEM ANALSYSIS FOR Nantong1
#### Load packages
library(lattice)
library(HSAUR)
library(vegan)
library(MASS)
library(graphics)
library(ade4)
library(adegraphics)
library(adespatial)
## Nantong1 XY
Nantong1.N.xy <- Nantong1[,c(4,5)]
Nantong1.N.xy
Nantong1.N <- Nantong1[,c(6,8,10,12,14,16,18,20)]
names(Nantong1.N)
Nantong1.N.N <- as.matrix(Nantong1.N)
Y = as.matrix(Nantong1.N.N)
X = as.matrix(Nantong1.N.xy)
##
Y = scale(Y, center=TRUE, scale= FALSE)
X = scale(X, center= TRUE, scale= FALSE)
# Matrix algebra equation for multivariate regression and calculation of residuals
Nantong1.N.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y)
Nantong1.N.res
##
Nantong1.N.lm = lm(as.matrix(Nantong1.N.N) ~ as.matrix(Nantong1.N.xy))
Nantong1.N.resid = residuals(Nantong1.N.lm)
head(Nantong1.N.resid)
# Check the results
sum(abs(Nantong1.N.res - Nantong1.N.resid)) # The two sets of residuals do not differ
diag(cor(Nantong1.N.res, Nantong1.N.resid)) # The two sets of residuals have correlations of 1
#
Nantong1.N.hel <- decostand(Nantong1.N.N, "hellinger", scale = FALSE, center = TRUE)
# Plot a rough map of the 25 sampling points
plot(Nantong1.N.xy, asp=1)
text(Nantong1.N.xy, labels=rownames(Nantong1.N.xy), pos=3)
#or rotate it
plot(Nantong1.N.xy$y,Nantong1.N.xy$x)
text(Nantong1.N.xy$y,Nantong1.N.xy$x)
# ========
# dbMEM analysis by steps
# 1. Detrend the species data
Nantong1.N.lm = lm(as.matrix(Nantong1.N.hel) ~ as.matrix(Nantong1.N.xy))
Nantong1.N.resid = residuals(Nantong1.N.lm)
Nantong1.N.resid
# 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that
#dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix.
# The default option of argument MEM.autocor is to compute only the eigenfunctions modelling
#positive spatial correlation. However, other options are available for argument MEM.autocor.
Nantong1.N.dbmem = dbmem(Nantong1.N.xy, silent=FALSE) # Note the truncation value
Nantong1.N.dbmem
summary(Nantong1.N.dbmem)
# Examine the positive eigenvalues
attributes(Nantong1.N.dbmem)$values
# Get the dbMEM eigenvectors modelling positive spatial correlation
Nantong1.N.mem = as.matrix(Nantong1.N.dbmem)
dim(Nantong1.N.mem) # How many are there?
# Plot the spatial weighting matrix of this analysis, showing all edges > truncation value
adegraphics::s.label(Nantong1.N.xy, nb = attr(Nantong1.N.dbmem, "listw")$neighbours)
# Plot maps of the first 6 of 7 dbMEM eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong1.N.xy, Nantong1.N.dbmem[,1:6])

# This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the
#writing of a for loop:
par(mfrow=c(2,3))
for(i in 1:6) {
  ade4::s.value(Nantong1.N.xy, Nantong1.N.mem[ ,i], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #",i), csub=1.5)
}
# Compute# Compute and test the Moran??ㄓs I values associated with the dbMEM eigenfunctions
# One can check that the eigenvalues are perfectly proportional to Moran??ㄓs I
( test <- moran.randtest(Nantong1.N.dbmem, nrepet = 9999) )
par(mfrow=c(1,2))
plot(test$obs, attr(Nantong1.N.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues")

# Plot the decreasing values of Moran??ㄓs I describing the successive dbMEM eigenfunctions.
# The red line is the expected value of Moran??ㄓs I under H0.
plot(test$obs, xlab="MEM rank", ylab="Moran's I")
abline(h=-1/(nrow(Nantong1.N.xy) - 1), col="red")

# 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM
RsquareAdj(rda(Nantong1.N.resid, Nantong1.N.mem)) # $r.squared [1] 0.3669698, $adj.r.squared [1] 0.1063103
# 4. Forward selection: identify the significant dbMEM among those that model positive
#spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??角.
( Nantong1.N.sel = forward.sel(Nantong1.N.resid, Nantong1.N.mem, adjR2thresh=0.3669698))
# Procedure stopped (alpha criteria): pvalue for variable 1 is 0.145000 (> 0.050000)

# dbMEM for Shannon diveristy in Nantong1
Nantong1.H.xy <- Nantong1[,c(4,5)]
Nantong1.H.xy
Nantong1.H <- Nantong1[,c(9,13,15,19)]
names(Nantong1.H)
Nantong1.H.N <- as.matrix(Nantong1.H)
Y = as.matrix(Nantong1.H.N)
X = as.matrix(Nantong1.H.xy)
##
Y = scale(Y, center=TRUE, scale= FALSE)
X = scale(X, center= TRUE, scale= FALSE)
# Matrix algebra equation for multivariate regression and calculation of residuals
Nantong1.H.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y)
##
Nantong1.H.lm = lm(as.matrix(Nantong1.H.N) ~ as.matrix(Nantong1.H.xy))
Nantong1.H.resid = residuals(Nantong1.H.lm)
head(Nantong1.H.resid)
# Check the results
sum(abs(Nantong1.H.res - Nantong1.H.resid)) # The two sets of residuals do not differ
diag(cor(Nantong1.H.res, Nantong1.H.resid)) # The two sets of residuals have correlations of 1
#
Nantong1.H.hel <- decostand(Nantong1.H.N, "hellinger", scale = FALSE, center = TRUE)
# Plot a rough map of the 25 sampling points
plot(Nantong1.H.xy, asp=1)
text(Nantong1.H.xy, labels=rownames(Nantong1.H.xy), pos=3)
#or rotate it
plot(Nantong1.H.xy$y,Nantong1.H.xy$x)
text(Nantong1.H.xy$y,Nantong1.H.xy$x)
# ========
# dbMEM analysis by steps
# 1. Detrend the species data
Nantong1.H.lm = lm(as.matrix(Nantong1.H.hel) ~ as.matrix(Nantong1.H.xy))
Nantong1.H.resid = residuals(Nantong1.H.lm)
# 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that
#dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix.
# The default option of argument MEM.autocor is to compute only the eigenfunctions modelling
#positive spatial correlation. However, other options are available for argument MEM.autocor.
Nantong1.H.dbmem = dbmem(Nantong1.H.xy, silent=FALSE) # Note the truncation value
Nantong1.H.dbmem
# Examine the positive eigenvalues
attributes(Nantong1.H.dbmem)$values
# Get the dbMEM eigenvectors modelling positive spatial correlation
Nantong1.H.mem = as.matrix(Nantong1.H.dbmem)
dim(Nantong1.H.mem) # How many are there?
# Plot the spatial weighting matrix of this analysis, showing all edges > truncation value
adegraphics::s.label(Nantong1.H.xy, nb = attr(Nantong1.H.dbmem, "listw")$neighbours)
# Plot maps of the first 6 dbMEM eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong1.H.xy, Nantong1.H.dbmem[,1:6])
# This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the
#writing of a for loop:
par(mfrow=c(2,3))
for(i in 1:6) {
  ade4::s.value(Nantong1.H.xy, Nantong1.H.mem[ ,i], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #",i), csub=1.5)
}
# Compute# Compute and test the Moran??ㄓs I values associated with the dbMEM eigenfunctions
# One can check that the eigenvalues are perfectly proportional to Moran??ㄓs I
( test <- moran.randtest(Nantong1.H.dbmem, nrepet = 9999) )
#
par(mfrow=c(1,2))
plot(test$obs, attr(Nantong1.H.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues")

# Plot the decreasing values of Moran??ㄓs I describing the successive dbMEM eigenfunctions.
# The red line is the expected value of Moran??ㄓs I under H0.
plot(test$obs, xlab="MEM rank", ylab="Moran's I")
abline(h=-1/(nrow(Nantong1.H.xy) - 1), col="red")

# 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM
RsquareAdj(rda(Nantong1.H.resid, Nantong1.H.mem)) # $r.squared [1] 0.3363683, $adj.r.squared [1] 0.06310819
# 4. Forward selection: identify the significant dbMEM among those that model positive
#spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??角.
( Nantong1.H.sel = forward.sel(Nantong1.H.resid, Nantong1.H.mem, adjR2thresh=0.3363683))
# Testing variable 1
# Testing variable 2
# Procedure stopped (alpha criteria): pvalue for variable 2 is 0.059000 (> 0.050000)
# variables order        R2     R2Cum   AdjR2Cum        F  pval
# 1      MEM2     2 0.1161412 0.1161412 0.07771254 3.022256 0.039

# The selected dbMEM variables are listed in vector "Nantong1.H.sel$order"(column 2 of output table)
( Nantong1.H.sel$order )
# Obtain an ordered list of the selected variable numbers:
( Nantong1.H.sel.dbmem = sort(Nantong1.H.sel$order) )
# Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table):
Nantong1.H.sel[1,]
# 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong1.H.xy, Nantong1.H.mem[ ,Nantong1.H.sel.dbmem])

# This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop:
par(mfrow=c(1,1))
for(i in 1:length(Nantong1.H.sel.dbmem)) {
  ade4::s.value(Nantong1.H.xy, Nantong1.H.mem[ ,Nantong1.H.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #", Nantong1.H.sel.dbmem[i]), csub=1.5) }
# 6. Canonical analysis (RDA): compute the dbMEM spatial model
Nantong1.H.rda = rda(Nantong1.H.resid ~ ., data=as.data.frame(Nantong1.H.mem[,Nantong1.H.sel.dbmem]))
anova(Nantong1.H.rda) # model F= 3.0223, Pr = 0.046*
anova(Nantong1.H.rda, by="axis") #RDA1 F= 3.0223, Pr= 0.038 *

# How many axes of the canonical model are significant at level alpha = 0.05?
anova(Nantong1.H.rda, by="axis", cutoff=0.05) #RDA1 F = 3.0223,  Pr = 0.036 *