# Load pacakages library(ade4) library(adegraphics) library(adespatial) library(vegan) library(MASS) ## read data file "Site2.Nantong1(Div.index).txt" Nantong1 <- read.table(choose.files(), header=T) summary(Nantong1) names(Nantong1) # Nantong1.xy <- Nantong1[,c(4,5)] Nantong1.xy ## model matrix Nantong1.mm = model.matrix(~ veg.,Nantong1)[,-1] # Nantong 1 Abundance Nantong1.N <- Nantong1[,c(6,8,10,12,14,16,18,20)] names(Nantong1.N) summary(Nantong1.N) colSums(Nantong1.N) rowSums(Nantong1.N) # Nantong1.N.mat <- as.matrix(Nantong1.N) Nantong1.N.mat Nantong1.N.hel <- (decostand(Nantong1.N.mat, "hell")) Nantong1.N.hell <- as.matrix(scale(Nantong1.N.hel, center = TRUE, scale = FALSE)) summary(Nantong1.N.hell) ## pcnm Nantong1.N.rs <- rowSums(Nantong1.N.mat)/sum(Nantong1.N.mat) Nantong1.pcn <- pcnm(dist(Nantong1.xy), w = Nantong1.N.rs) attributes(Nantong1.pcn$vectors) Nantong1.N.pcnm = as.matrix(Nantong1.pcn$vectors) ## varpart of two explanatory variables; vegetation and euclidean distance Nantong1.N.var <- varpart(Nantong1.N.hell, Nantong1.mm, Nantong1.N.pcnm) ######## Nantong1 Diversity Nantong1.H <- Nantong1[,c(9,13,15,17,19)] names(Nantong1.H) colSums(Nantong1.H) rowSums(Nantong1.H) # Nantong1.xy <- Nantong1[,c(4,5)] Nantong1.xy ## model matrix Nantong1.mm = model.matrix(~ veg.,Nantong1)[,-1] # DATA matrix Nantong1.H.mat <- as.matrix(Nantong1.H) Nantong1.H.mat Nantong1.H.hel <- (decostand(Nantong1.H.mat, "hell")) Nantong1.H.hell <- as.matrix(scale(Nantong1.H.hel, center = TRUE, scale = FALSE)) summary(Nantong1.H.hell) ## pcnm Nantong1.H.rs <- rowSums(Nantong1.H.mat)/sum(Nantong1.H.mat) Nantong1.pcn <- pcnm(dist(Nantong1.xy), w = Nantong1.H.rs) attributes(Nantong1.pcn$vectors) Nantong1.H.pcnm = as.matrix(Nantong1.pcn$vectors) ## varpart of two explanatory variables; vegetation and euclidean distance Nantong1.H.var <- varpart(Nantong1.H.hell, Nantong1.mm, Nantong1.H.pcnm) Nantong1.H.var showvarparts(2) plot(Nantong1.H.var,bg=1:3) ## rda to show the relative imporatnce between surrouding vegetation and geographical distance Nantong1.H.rda = rda(Nantong1.H.hell, Nantong1.mm, Nantong1.H.pcnm) summary(Nantong1.H.rda) Nantong1.H.rda # regression coefficients for each explanatory variable coef(Nantong1.H.rda) # retrieve R2 from rda results (R2 <- RsquareAdj(Nantong1.H.rda)$r.squared) # which is "[1] 0.1911394" # Adjusted R2 retrived from the rda object (R2adj <- RsquareAdj(Nantong1.H.rda)$adj.r.squared) # which is "[1] 0.1402984" # Triplot of rda results plot(Nantong1.H.rda, scaling =2, main = "Triplot RDA for spider fami. Shannon-diversity in Nantong 1 ~ surrouding veg.+pcnm", xlab = "RDA1(proportion exlpained=26%)", ylab = "RDA2(proportion explained=13%)") # This plot displays all our entities: sites, species # explanatory variables as arrows (heads) # species are in red colour without the arrows # Let's add arrows to the species without heads to make them different from the output object Nantong1.H.scor <- scores(Nantong1.H.rda, choices=1:2, scaling=1, display= "sp") arrows(0, 0, Nantong1.H.scor[,1], Nantong1.H.scor[,2], length=0, lty=1, col="red") # RDA triplot of the Hellinger-transformed spider shannon diversity data in Nantong 1 # constrained by all environmental variables, scaling 1. # The bottom and left-hand scales are for the objects and the # response variables, the top and right-hand scales # are for the explanatory variables # Let's test the significance of "rda" resluts anova(Nantong1.H.rda, step=1000) # F= 1.7488, Pr(>F)=0.11 # ### HEATMAPS OF BETA DISSIMILARITY IN Nantong1 # HELP WEBSITE # http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/ # library(gplots) # for heatmap.2 library(Heatplus) # load the vegan package for hierachical clustering if you want to use distance functions not specified in dist. library(vegan) # load the RColorBrewer package for better colour options library(RColorBrewer) library(pvclust) library(ComplexHeatmap) # row.names <- Nantong1[,c(3)] Nantong1.N <- Nantong1[, c(6,8,10,12,14,16,18,20)] names(Nantong1.N) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Nantong1.N), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Nantong1.N, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. # The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Nantong1.N), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Nantong1.N), method = "bray") data.dist.g # Do complete linkage hierarchical clustering. Other options are 'average' or 'single'. # do the complete linkage hierarchical clustering col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Nantong1.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 25 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 25, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Nantong1.N), var1) heatmap.2(as.matrix(Nantong1.N),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Nantong1.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Nantong1 Abundance", lhei = c(2, 8)) # pvclust to test the goodness of cluster result <- pvclust(Nantong1.N, method.dist="correlation", method.hclust="ward.D2", nboot=1000) result plot(result) # Nantong1 Shannon Diversity Nantong1.Div <- Nantong1[, c(9,13,15,17,19)] names(Nantong1.Div) colSums(Nantong1.Div) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Nantong1.Div), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Nantong1.Div, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. #The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Nantong1.Div), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Nantong1.Div), method = "bray") data.dist.g # Do complete linkage hierarchical clustering. col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Nantong1.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 25 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 25, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Nantong1.Div), var1) heatmap.2(as.matrix(Nantong1.Div),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Nantong1.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Nantong1 Diversity", lhei = c(2, 8)) # result.d <- pvclust(Nantong1.Div, method.dist="uncentered", method.hclust="ward.D2", nboot=1000) result.d plot(result.d) ### dbMEM ANALSYSIS FOR Nantong1 #### Load packages library(lattice) library(HSAUR) library(vegan) library(MASS) library(graphics) library(ade4) library(adegraphics) library(adespatial) ## Nantong1 XY Nantong1.N.xy <- Nantong1[,c(4,5)] Nantong1.N.xy Nantong1.N <- Nantong1[,c(6,8,10,12,14,16,18,20)] names(Nantong1.N) Nantong1.N.N <- as.matrix(Nantong1.N) Y = as.matrix(Nantong1.N.N) X = as.matrix(Nantong1.N.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Nantong1.N.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) Nantong1.N.res ## Nantong1.N.lm = lm(as.matrix(Nantong1.N.N) ~ as.matrix(Nantong1.N.xy)) Nantong1.N.resid = residuals(Nantong1.N.lm) head(Nantong1.N.resid) # Check the results sum(abs(Nantong1.N.res - Nantong1.N.resid)) # The two sets of residuals do not differ diag(cor(Nantong1.N.res, Nantong1.N.resid)) # The two sets of residuals have correlations of 1 # Nantong1.N.hel <- decostand(Nantong1.N.N, "hellinger", scale = FALSE, center = TRUE) # Plot a rough map of the 25 sampling points plot(Nantong1.N.xy, asp=1) text(Nantong1.N.xy, labels=rownames(Nantong1.N.xy), pos=3) #or rotate it plot(Nantong1.N.xy$y,Nantong1.N.xy$x) text(Nantong1.N.xy$y,Nantong1.N.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Nantong1.N.lm = lm(as.matrix(Nantong1.N.hel) ~ as.matrix(Nantong1.N.xy)) Nantong1.N.resid = residuals(Nantong1.N.lm) Nantong1.N.resid # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Nantong1.N.dbmem = dbmem(Nantong1.N.xy, silent=FALSE) # Note the truncation value Nantong1.N.dbmem summary(Nantong1.N.dbmem) # Examine the positive eigenvalues attributes(Nantong1.N.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Nantong1.N.mem = as.matrix(Nantong1.N.dbmem) dim(Nantong1.N.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Nantong1.N.xy, nb = attr(Nantong1.N.dbmem, "listw")$neighbours) # Plot maps of the first 6 of 7 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong1.N.xy, Nantong1.N.dbmem[,1:6]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(2,3)) for(i in 1:6) { ade4::s.value(Nantong1.N.xy, Nantong1.N.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Nantong1.N.dbmem, nrepet = 9999) ) par(mfrow=c(1,2)) plot(test$obs, attr(Nantong1.N.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Nantong1.N.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Nantong1.N.resid, Nantong1.N.mem)) # $r.squared [1] 0.3669698, $adj.r.squared [1] 0.1063103 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Nantong1.N.sel = forward.sel(Nantong1.N.resid, Nantong1.N.mem, adjR2thresh=0.3669698)) # Procedure stopped (alpha criteria): pvalue for variable 1 is 0.145000 (> 0.050000) # dbMEM for Shannon diveristy in Nantong1 Nantong1.H.xy <- Nantong1[,c(4,5)] Nantong1.H.xy Nantong1.H <- Nantong1[,c(9,13,15,19)] names(Nantong1.H) Nantong1.H.N <- as.matrix(Nantong1.H) Y = as.matrix(Nantong1.H.N) X = as.matrix(Nantong1.H.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Nantong1.H.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) ## Nantong1.H.lm = lm(as.matrix(Nantong1.H.N) ~ as.matrix(Nantong1.H.xy)) Nantong1.H.resid = residuals(Nantong1.H.lm) head(Nantong1.H.resid) # Check the results sum(abs(Nantong1.H.res - Nantong1.H.resid)) # The two sets of residuals do not differ diag(cor(Nantong1.H.res, Nantong1.H.resid)) # The two sets of residuals have correlations of 1 # Nantong1.H.hel <- decostand(Nantong1.H.N, "hellinger", scale = FALSE, center = TRUE) # Plot a rough map of the 25 sampling points plot(Nantong1.H.xy, asp=1) text(Nantong1.H.xy, labels=rownames(Nantong1.H.xy), pos=3) #or rotate it plot(Nantong1.H.xy$y,Nantong1.H.xy$x) text(Nantong1.H.xy$y,Nantong1.H.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Nantong1.H.lm = lm(as.matrix(Nantong1.H.hel) ~ as.matrix(Nantong1.H.xy)) Nantong1.H.resid = residuals(Nantong1.H.lm) # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Nantong1.H.dbmem = dbmem(Nantong1.H.xy, silent=FALSE) # Note the truncation value Nantong1.H.dbmem # Examine the positive eigenvalues attributes(Nantong1.H.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Nantong1.H.mem = as.matrix(Nantong1.H.dbmem) dim(Nantong1.H.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Nantong1.H.xy, nb = attr(Nantong1.H.dbmem, "listw")$neighbours) # Plot maps of the first 6 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong1.H.xy, Nantong1.H.dbmem[,1:6]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(2,3)) for(i in 1:6) { ade4::s.value(Nantong1.H.xy, Nantong1.H.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Nantong1.H.dbmem, nrepet = 9999) ) # par(mfrow=c(1,2)) plot(test$obs, attr(Nantong1.H.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Nantong1.H.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Nantong1.H.resid, Nantong1.H.mem)) # $r.squared [1] 0.3363683, $adj.r.squared [1] 0.06310819 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Nantong1.H.sel = forward.sel(Nantong1.H.resid, Nantong1.H.mem, adjR2thresh=0.3363683)) # Testing variable 1 # Testing variable 2 # Procedure stopped (alpha criteria): pvalue for variable 2 is 0.059000 (> 0.050000) # variables order R2 R2Cum AdjR2Cum F pval # 1 MEM2 2 0.1161412 0.1161412 0.07771254 3.022256 0.039 # The selected dbMEM variables are listed in vector "Nantong1.H.sel$order"(column 2 of output table) ( Nantong1.H.sel$order ) # Obtain an ordered list of the selected variable numbers: ( Nantong1.H.sel.dbmem = sort(Nantong1.H.sel$order) ) # Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table): Nantong1.H.sel[1,] # 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong1.H.xy, Nantong1.H.mem[ ,Nantong1.H.sel.dbmem]) # This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop: par(mfrow=c(1,1)) for(i in 1:length(Nantong1.H.sel.dbmem)) { ade4::s.value(Nantong1.H.xy, Nantong1.H.mem[ ,Nantong1.H.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #", Nantong1.H.sel.dbmem[i]), csub=1.5) } # 6. Canonical analysis (RDA): compute the dbMEM spatial model Nantong1.H.rda = rda(Nantong1.H.resid ~ ., data=as.data.frame(Nantong1.H.mem[,Nantong1.H.sel.dbmem])) anova(Nantong1.H.rda) # model F= 3.0223, Pr = 0.046* anova(Nantong1.H.rda, by="axis") #RDA1 F= 3.0223, Pr= 0.038 * # How many axes of the canonical model are significant at level alpha = 0.05? anova(Nantong1.H.rda, by="axis", cutoff=0.05) #RDA1 F = 3.0223, Pr = 0.036 *