# Sean Kinard # 1-20-2020 # Spring 17 Texas Coastal Prairie # Invertebrate Canonical Ordination Using Hellinger-transformation and # RDA constrained to annual precipitation # Set Working Directory "...PeerJ_Sp17_REVISED\\" # setwd("C:\\Users\\s2kin\\Dropbox\\Research\\Manuscipts\\Diss.2_Gradient\\PeerJ_Sp17_REVISED\\") # Load Packages library(BiodiversityR) library(ggsci) library(ggrepel) library(ggforce) library(ade4) library(adegraphics) library(adespatial) library(cocorresp) library(MASS) library(ellipse) library(FactoMineR) library(rrcov) library(tidyverse) # Source additional functions source("sp17_r_source_files\\hcoplot.R") source("sp17_r_source_files\\triplot.rda.R") source("sp17_r_source_files\\plot.lda.R") source("sp17_r_source_files\\polyvars.R") source("sp17_r_source_files\\screestick.R") # Load Data env <- read.csv("sp17_data_files/sp17_site_x_env.csv") invert <- read.csv("sp17_data_files/sp17_site_x_invert.csv") # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # cleaning up data frames to include only the a priori selected variables and community abundance matrix (env <- select(env, STAID, AP, flash.index, LFPP, NH4., log.cond, Rosgen.Index)) invert <- select(invert, STAID, ALISOTRICHIA:VALVATA) # Merge frames by STAID to ensure same order of sites (rows) mster <- merge(env,invert, by = "STAID") # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # # # # # # # # # # # # # # # # Redundancy Analysis (RDA) # # # # # # # # # # # # # # # # # step 1: The ordiplot object is obtained from the result of via function rda. The ordination is done with a community data set that is transformed by the Hellinger method as recommended in this article. # script generated by the BiodiversityR GUI from the constrained ordination menu invert.Hellinger <- disttransform(select(mster, ALISOTRICHIA:VALVATA), method='hellinger') Ordination.model2 <- rda(invert.Hellinger ~ .,data=env, scaling="species") summary(Ordination.model2) # This can be plotted via ordiplot plot2 <- ordiplot(Ordination.model2, choices=c(1,2)) # Step 2 : To plot data via ggplot2 # information on the locations of sites (circles in the ordiplot) is obtained via function sites.long. sites.long2 <- sites.long(plot2, env.data=mster[,2:7]) head(sites.long2) # Information on species is extracted by function species.long. species.long2 <- species.long(plot2) species.long2 # Information on the labeling of the axes is obtained with function axis.long. This information is extracted from the ordination model and not the ordiplot, hence it is important to select the same axes via argument 'choices'. The extracted information includes information on the percentage of explained variation. I suggest that you cross-check with the summary of the redundancy analysis, where information on proportion explained is given. axis.long2 <- axis.long(Ordination.model2, choices=c(1, 2)) axis.long2 # Step 3: Generate plot adding information on sites and species similar to the ordiplot: plot_irda <- ggplot() + geom_vline(xintercept = c(0), color = "grey70", linetype = 2) + geom_hline(yintercept = c(0), color = "grey70", linetype = 2) + xlab(axis.long2[1, "label"]) + ylab(axis.long2[2, "label"]) + scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + geom_point(data=sites.long2, aes(x=axis1, y=axis2, colour=AP), size=6) + geom_point(data=species.long2, aes(x=axis1, y=axis2)) + scale_colour_viridis_c(direction=-1) + coord_fixed(ratio=1) + theme_classic() + theme(text = element_text(size = 16)) + theme(axis.text = element_text(size = 16)) + theme(axis.title = element_text(size = 20)) plot_irda # With an ordination via redundancy analysis, the positions of species should be interpreted as arrows. It is straightforward to show the positions of the species as arrows. However, with a large number of species, it is better to reduce the number to those that explain more of the variation among the sites. This information is obtained with the envfit function of vegan. spec.envfit <- envfit(plot2, env=invert.Hellinger) spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals) species.long2 <- species.long(plot2, spec.data=spec.data.envfit) species.long2 # Species are selected that have a statistically significant p-value species.long3 <- species.long2[species.long2$p <= 0.15, ] species.long3 # adding environmental fit vectors vectors.envfit <- envfit(plot2, env=mster[,2:7]) vectors.long2 <- vectorfit.long(vectors.envfit) vectors.long2 # Only Predictors are selected that have a statistically significant p-value vectors.long3 <- vectors.long2[vectors.long2$p <= 0.05, ] vectors.long3 # Invetebrate RDA ggplot plot_irda <- ggplot() + geom_vline(xintercept = c(0), color = "grey70", linetype = 2) + geom_hline(yintercept = c(0), color = "grey70", linetype = 2) + xlab(axis.long2[1, "label"]) + ylab(axis.long2[2, "label"]) + scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + geom_segment(data=vectors.long2, aes(x=0, y=0, xend=axis1*.7, yend=axis2*.7), colour="gray", size=1.2, arrow=arrow()) + geom_segment(data=vectors.long3, aes(x=0, y=0, xend=axis1*.7, yend=axis2*.7), colour="black", size=1.2, arrow=arrow()) + geom_point(data=species.long2, aes(x=axis1*2, y=axis2*2), colour="red", shape = 3) + geom_label_repel(data=species.long3, aes(x=axis1*2, y=axis2*2, label=labels), box.padding = 1) + geom_label_repel(data=vectors.long2, aes(x=axis1*.5, y=axis2*.5, label=rownames(vectors.long2)), colour="black", box.padding = 0.7 ) + geom_point(data=sites.long2, alpha = 0.8, aes(x=axis1, y=axis2, colour=AP), size=5, show.legend = FALSE) + scale_colour_viridis_c(direction=-1) + coord_fixed(ratio=1) + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 16)) + theme(axis.text = element_text(size = 16)) + theme(axis.title = element_text(size = 18)) plot_irda # which gives the following ordination diagram (length of vectors were multiplied, which does not change their interpretation) # species vectors without labels plot_irda_nolabel <- ggplot() + geom_vline(xintercept = c(0), color = "grey70", linetype = 2) + geom_hline(yintercept = c(0), color = "grey70", linetype = 2) + xlab(axis.long2[1, "label"]) + ylab(axis.long2[2, "label"]) + scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) + geom_point(data=species.long2, aes(x=axis1*2, y=axis2*2), colour="red", shape = 3) + geom_point(data=sites.long2, alpha = 0.8, aes(x=axis1, y=axis2, colour=AP), size=5, show.legend = FALSE) + scale_colour_viridis_c(direction=-1) + coord_fixed(ratio=1) + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 16)) + theme(axis.text = element_text(size = 16)) + theme(axis.title = element_text(size = 18)) plot_irda_nolabel # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - # # Seek Alternative RDA constrained to all environmental predictors # because all variables are not advised for small sample size spe <- select(mster, ALISOTRICHIA:VALVATA) env <- select(mster, AP:Rosgen.Index) # RDA: all environmental variables spe.hel <- disttransform(spe, method='hellinger') (spe.rda <- rda(spe.hel ~ ., env)) vif.cca(spe.rda) # AP flash.index LFPP NH4. log.cond Rosgen.Index # 10.466060 2.090356 3.416691 6.289334 8.231157 1.152091 # RDA with all explanatory variables spe.rda.all <- rda(spe.hel ~ ., data = env) # Global adjusted R^2 (R2a.all <- RsquareAdj(spe.rda.all)) # R2 = 0.6514918 # Forward selection using vegan's ordistep() # This function allows the use of factors. mod0 <- rda(spe.hel ~ 1, data = env) step.forward <- ordistep(mod0, scope = formula(spe.rda.all), direction = "forward", permutations = how(nperm = 499) # Step: spe.hel ~ log.cond ) RsquareAdj(step.forward) # Forward model selection stops at spe.hel ~ log.cond with R2 = 0.159445 ## 1. Variation partitioning Conductivity vs all other environmental variables (spe.part.all <- varpart(spe.hel, env$log.cond, select(env, -log.cond))) # Plot of the partitioning results par(mfrow = c(1, 1)) plot(spe.part.all, digits = 2, bg = c("red", "blue", "yellow")) # No combination of predictors contains a useful amount of variation in the # hellinger transformed invertebrate community matrix # conducitivity <0 # shared 0.103 # other Env <0 # residuals 1.046 # End # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #