# Sean Kinard
# 03-30-2021
# Spring 17 Texas Coastal Prairie

# Invertebrate Canonical Ordination Using Hellinger-transformation and 
# RDA constrained to annual precipitation



# Load Packages
library(BiodiversityR)
library(ggsci)
library(ggrepel)
library(ggforce)
library(ade4)
library(adegraphics)
library(adespatial)
library(cocorresp)
library(MASS)
library(ellipse)
library(FactoMineR)
library(rrcov)
library(tidyverse)

# Load Data
env <- read.csv("sp17_data_files/sp17_site_x_env.csv")
fish <- read.csv("sp17_data_files/sp17_site_x_fish.csv")

# - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #

# cleaning up data frames to include only the a priori selected variables and community abundance matrix
(env <- select(env, STAID, AP, flash.index, LFPP, NH4., log.cond, Rosgen.Index))

env[,2:7] <- scale(env[,2:7])

fish <- select(fish, STAID, A.melas:T.maculatus)

# Merge dataframes to ensure matching rows (sites) 
msterfish <- merge(env,fish, by = "STAID")

# matrix formation
spec <- msterfish[,8:25]
env2 <- msterfish[,2:7]

# - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #

# # # # # # # # # # # # # # # 
# Redundancy Analysis (RDA) #
# # # # # # # # # # # # # # # 

# step 1: The ordiplot object is obtained from the result of via function rda. The ordination is done with a community data set that is transformed by the Hellinger method.

# script generated by the BiodiversityR GUI from the constrained ordination menu
fish.Hellinger <- disttransform(spec, method='hellinger')
Ordination.model2 <- rda(fish.Hellinger ~ . ,data=msterfish, scaling="species")
summary(Ordination.model2)

# This can be plotted via ordiplot
plot2 <- ordiplot(Ordination.model2, choices=c(1,2))

# Step 2 : To plot data via ggplot2

# information on the locations of sites (circles in the ordiplot) is obtained via function sites.long.
sites.long2 <- sites.long(plot2, env.data=msterfish[,2:8])
head(sites.long2)

# Information on species is extracted by function species.long.
species.long2 <- species.long(plot2)
species.long2 

# Information on the labeling of the axes is obtained with function axis.long. This information is extracted from the ordination model and not the ordiplot, hence it is important to select the same axes via argument 'choices'. The extracted information includes information on the percentage of explained variation. I suggest that you cross-check with the summary of the redundancy analysis, where information on proportion explained is given.

axis.long2 <- axis.long(Ordination.model2, choices=c(1, 2))
axis.long2

# Step 3: Generate plot adding information on sites and species similar to the ordiplot:

plot_frda <- ggplot() +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  xlab(axis.long2[1, "label"]) +
  ylab(axis.long2[2, "label"]) +
  scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  geom_point(data=sites.long2,
             aes(x=axis1, y=axis2, colour=AP),
             size=6) +
  geom_point(data=species.long2,
             aes(x=axis1, y=axis2)) +
  scale_colour_viridis_c(direction=-1) +
  coord_fixed(ratio=1) +
  theme_classic() +
  theme(text = element_text(size = 16)) +
  theme(axis.text = element_text(size = 16)) +
  theme(axis.title = element_text(size = 20))


plot_frda

# With an ordination via redundancy analysis, the positions of species should be interpreted as arrows. It is straightforward to show the positions of the species as arrows. However, with a large number of species, it is better to reduce the number to those that explain more of the variation among the sites. This information is obtained with the envfit function of vegan.

spec.envfit <- envfit(plot2, env=fish.Hellinger)
spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals)
species.long2 <- species.long(plot2, spec.data=spec.data.envfit)
species.long2

# Species are selected that have a statistically significant p-value
species.long3 <- species.long2[species.long2$p <= 0.3, ]
species.long3

# Now the information of the selected species can be added to the ordination diagram.

# adding environmental fit vectors
vectors.envfit <- envfit(plot2, env=msterfish[,2:7])
vectors.long2 <- vectorfit.long(vectors.envfit)
vectors.long2

# Only Predictors are selected that have a statistically significant p-value
vectors.long3 <- vectors.long2[vectors.long2$p <= 0.05, ]
vectors.long3

# which gives the following ordination diagram (length of vectors were multiplied, which does not change their interpretation)
plot_frda <- ggplot() +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  xlab(axis.long2[1, "label"]) +
  ylab(axis.long2[2, "label"]) +
  scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  geom_segment(data=vectors.long2,
               aes(x=0, y=0, xend=axis1*.5, yend=axis2*.5),
               colour="black", size=1.2, arrow=arrow()) +
  geom_label_repel(data=vectors.long2,
                   aes(x=axis1*.5, y=axis2*.5, label=rownames(vectors.long2)),
                   colour="black", box.padding = 0.7) +
  geom_point(data=species.long2,
               aes(x=axis1*1.5, y=axis2*1.5),
               colour="red", size=1.2, shape = 3) +
  geom_label_repel(data=species.long3,
                   aes(x=axis1*1.5, y=axis2*1.5, label=labels),
                   box.padding = 1.0)  +
  geom_point(data=sites.long2, alpha = 0.8,
             aes(x=axis1, y=axis2, colour=AP),
             size=5, show.legend = FALSE) +
  scale_colour_viridis_c(direction=-1) +
  coord_fixed(ratio=1) +
  theme_classic() +
  theme(text = element_text(size = 16)) +
  theme(axis.text = element_text(size = 16)) +
  theme(axis.title = element_text(size = 20))

# which gives the following ordination diagram (length of vectors were multiplied, which does not change their interpretation)

# environmental vectors (black = p <.05, gray = ns)
# LFPP and NH4. have p-values that exceed but also round to 0.05
plot_frda

#  without labels
plot_frda_nolabel  <- ggplot() +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +
  xlab(axis.long2[1, "label"]) +
  ylab(axis.long2[2, "label"]) +
  scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
  geom_point(data=species.long2,
             aes(x=axis1*1.5, y=axis2*1.5),
             colour="red", size=1.2, shape = 3) +
  geom_point(data=sites.long2, alpha = 0.8,
             aes(x=axis1, y=axis2, colour=AP),
             size=5, show.legend = FALSE) +
  scale_colour_viridis_c(direction=-1) +
  coord_fixed(ratio=1) +
  theme_classic() +
  theme(text = element_text(size = 16)) +
  theme(axis.text = element_text(size = 16)) +
  theme(axis.title = element_text(size = 20))

plot_frda_nolabel

# - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #

# Alternative RDA constrained to all environmental predictors 
# (not advised for small sample sites)
# Seek 'partially' constrained RDA with fewer variables where # variables is less than the sample sites divided by 2 (See Legendre 2018 Numerical Ecology using R)
# Secondary Goal: Identify covariation among predictors via VIF and variance partitions

# create datasets
spec <- msterfish[,9:25]
env2 <- msterfish[,2:7]

# RDA: all environmental variables
spe.hel <- disttransform(spec, method='hellinger')
(spe.rda <- rda(spe.hel ~ ., env2)) 
vif.cca(spe.rda) 
# Note that Ap (precipitation has VIF >10)

#     RDA with all explanatory variables
spe.rda.all <- rda(spe.hel ~ ., data = env2)
# Global adjusted R^2
(R2a.all <- RsquareAdj(spe.rda.all))
# R2 = 0.7075825

# Forward selection using vegan's ordistep()
# This function allows the use of factors. 
mod0 <- rda(spe.hel ~ 1, data = env2)
step.forward <- 
  ordistep(mod0, 
           scope = formula(spe.rda.all), 
           direction = "forward", 
           permutations = how(nperm = 499) # Step: spe.hel ~ log.cond 
  )
RsquareAdj(step.forward)

# Forward model selection stops at: spe.hel ~ NH4.
# with  R2 of 0.2477466

## 1. Variation partitioning NH4 vs all environmental variables
(spe.part.all <- varpart(spe.hel, env$NH4., select(env, -NH4., -AP)))

# Plot of the partitioning results
par(mfrow = c(1, 1))
plot(spe.part.all, digits = 2, bg = c("red", "blue", "yellow"))

# Hellinger Transformed fish community Variance Partitioning
# NH4      0.089
# shared   0.065
# other E  0.112
# Residual 0.735

#    Partial RDA with Only NH4
spe.rda.NH4 <- rda(spe.hel ~ NH4., data = env2)
# Global adjusted R^2
(R2a.all <- RsquareAdj(spe.rda.NH4))
# R2 = 0.2467953 

# End
# - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #