## Supplementary file Code S2 - Controlled experiment with treatments (Porensky et al. 2016)

###################################################
### LOAD REQUIRED PACKAGES
###################################################

library(tidyverse) # for data handling and manipulation
library(readxl) # for reading in Excel files
library(codyn) # a range of community dynamics analyses
library(vegan) # for a range of community analyses
library(labdsv) # for a range of community analyses
library(betapart) # for beta diversity partitioning
library(adespatial) # beta diversity calculations
library(VennDiagram) # overlap and Venn diagrams

###################################################
### SET UP THE OBJECTS
###################################################

# Save current graphical settings
opar <- par() 

# Download the file from the dryad database: https://doi.org/10.5061/dryad.3m8p1
download.file(url="https://datadryad.org/stash/downloads/file_stream/35070", destfile="Porensky et al 2016.xlsx", mode="wb")

# Read in the data and name the object
pore <- read_excel("Porensky et al 2016.xlsx", col_names=T) # ignore timezone warning

# Rename the grazing treatment variable so it is easier to call on after reading from excel
pore <- rename(pore, Grazing.Treatment=`Grazing Treatment`) 

# This dataset does not have transect names, so create variable that is unique for each transect-treatment combination
pore$transect <- paste(pore$Grazing.Treatment, pore$replicate, pore$transect, sep="_") 
pore$transect <- as.factor(pore$transect[drop = TRUE])

# Change the variable transect into numbers and add an "S" before each number to make sure the transect variable will not be treated as numeric
pore$transect.num <- paste("S", sprintf("%02d", as.integer(pore$transect)), sep="")

# Make a variable that has unique combinations for each transect and year, i.e., time
pore$transect.time <- paste(pore$transect.num, pore$Year, sep="_") 

#There were changes in grazing treatments in 2007 but since there were no transect identifiers it is unclear which transects changed treatment. Therefore, we remove the pre-2007 data for our analysis
# Make a new object in long format that contains only data from after 2007 and remove species with zero cover
poren.long <-pore %>% dplyr::filter(Year > 2006) %>%
  arrange(Year, Grazing.Treatment) %>%
  gather(species, cover, -Year, -Grazing.Treatment, -replicate, -transect, -BARE, -DUNG, -FUNGI, -LICH, -LITT, -MOSS, -MUSH, -Total, -TotalPlant, -transect.num, -transect.time) %>%
  group_by(species) %>% 
  filter(cover > 0) %>%
  droplevels()

poren <- poren.long %>%
  arrange(Year, Grazing.Treatment) %>%
  spread(species, cover, fill = 0) %>%
  separate(transect, -1, into = c("temp", "temp1"), remove = FALSE) %>%
  unite("Trt_transect", c("Grazing.Treatment", "replicate", "temp1"), remove = FALSE) %>%
  dplyr::select(-temp, -temp1) %>%
  mutate(Trt_transect = fct_relevel(Trt_transect, "exclosure_1_f","exclosure_1_s","exclosure_2_f","exclosure_2_s", "new exclosure_1_f","new exclosure_1_s","new exclosure_2_f","new exclosure_2_s", "continuous light_1_f", "continuous light_1_s","continuous light_2_f","continuous light_2_s","new light_1_f","new light_1_s","new light_2_f","new light_2_s", "continuous moderate_1_f", "continuous moderate_1_s","continuous moderate_2_f","continuous moderate_2_s","continuous heavy_1_f","continuous heavy_1_s","continuous heavy_2_f","continuous heavy_2_s")) %>%
  mutate(Grazing.Treatment = as.factor(Grazing.Treatment)) %>%
  as.data.frame() # recreate the wide format dataset

row.names(poren) <- poren$transect.time # add the transect.time variable as the row names

poren.wide <- poren %>% 
  dplyr::select(-Year, -Trt_transect, -Grazing.Treatment, -replicate, -transect, -BARE, -DUNG, -FUNGI, -LICH, -LITT, -MOSS, -MUSH, -Total, -TotalPlant, -transect.num, -transect.time) # create an abundance matrix

poren.test.transect <- poren %>%  # a single transect
  filter(transect == "continuous heavy_1_f") %>%
  dplyr::select(-Year, -Trt_transect, -Grazing.Treatment, -replicate, -transect, -BARE, -DUNG, -FUNGI, -LICH, -LITT, -MOSS, -MUSH, -Total, -TotalPlant, -transect.num, -transect.time) %>%
  as.data.frame(.)

# Make a new object with the unique combinations of transects and treatments
poren.trts <- poren %>%
  dplyr::select(transect.num, Grazing.Treatment, Trt_transect) %>%
  distinct(transect.num, Grazing.Treatment) 

###################################################
### EXPLORATORY DATA ANALYSIS (EDA)
###################################################

## COUNT THE NUMBER OF TAXA, NUMBER OF SAMPLES, NUMBER OF EMPTY SAMPLES

### NUMBER OF TAXA and NUMBER OF SAMPLES

dim(poren.wide) # 192 species, 93 time-samples

levels(as.factor(poren.long$transect.num)) 
length(levels(as.factor(poren.long$transect.num))) # 24 different transects

### SPATIAL AND TEMPORAL REPLICATION

table(poren$Year, poren$Grazing.Treatment) # shows the replication of experimental treatments across time

### ROW AND COLUMN SUMMARIES AND RELATIVE ABUNDANCE DISTRIBUTION

#### ROW SUMMARIES, i.e. transect summaries

# Total cover across all species for each time by summing the covers in each row. Interpretation: There are no empty transects and there are some transects with greater total relative abundance (i.e. cover) than others.
transect.totals <- rowSums(poren.wide)
summary(transect.totals)
hist(transect.totals)

#### COLUMN SUMMARIES, i.e. taxa summary

# Calculate the total cover of each species across all transect-times  
species.totals <- colSums(poren.wide)
summary(species.totals)
hist(species.totals) # We can see that there is one taxon that is highly abundant - Which one is it?

species.totals[species.totals > 2800] == T # Show the rows that have total cover greater than 2800

summary(poren.wide$BOGR)
hist(poren.wide$BOGR) # Interpretation: Species BOGR is a very common and highly dominant (Bouteloua gracilis). 

### RELATIVE ABUNDANCE DISTRIBUTIONS

# Calculate relative abundances of all species over all times. In this dataset abundance==cover

# Make an object containing the mean cover of each species across all transects and times, i.e. the mean of each column
spp.mean.cover <- as.data.frame(colMeans(poren.wide))

# Label the column in this object
names(spp.mean.cover) <- "mean.cover"

# Add a variable which takes on the value of the row names, i.e. the species
spp.mean.cover$species <- rownames(spp.mean.cover)

# Order the object from largest to smallest mean cover
spp.cover.ord <- arrange(spp.mean.cover, desc(mean.cover))

ggplot(spp.cover.ord[1:50,], aes(x=reorder(species, -mean.cover), mean.cover)) + 
  geom_bar(stat = "identity") +
  ylab("Mean cover") +
  xlab("") + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 

# Calculate total relative abundances for all species across all times
spp.cover.times <- poren.long %>%
  group_by(Year, species) %>%
  summarise(total.cover = sum(cover)) %>% # Calculate the total cover of each species in each year
  arrange(Year, desc(total.cover)) # Organise in decending order

# Plot relative abundances for the more common species for each year separately. Interpretation: BOGR is the most abundant species in 2007 and 2014. STCO was abundant in both years, but VUOC increased in abundance overall between 2007 and 2014.
spp.cover.times %>% filter(total.cover > 40) %>%
  ggplot(aes(x = reorder(species, -total.cover), y = total.cover)) +  
  geom_bar(stat="identity") +
  facet_grid(. ~ Year) + 
  ylab("Total cover") +
  xlab("") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 

### SPECIES ACCUMULATION CURVES

# Use the specaccum function and specify random accumulation of species. Interpretation: The curve has reached an asymptote, demonstrating that the sampling effort was adequate to recover the expected number species at around 150 samples.
sa.random <- specaccum(poren.wide, method = "random", permutations = 1000)
plot(sa.random)

# We can also order the samples by time and investigate how species accumulated over time
# Use the specaccum function and specify "collector" method, which adds transects in the order they happen to be in the data. Interpretation: Shows that new species continued to be added over time, but most had been observed by the ~100th transect sampling time. You can also look at the output as well as the plot to better assess this and investigate quartiles.
poren.ord <- arrange(poren, Year)
sa.collector <- specaccum(poren.wide, method = "collector", permutations = 1000)
plot(sa.collector) 

### EXAMINE SPECIES' FREQUENCY DISTRIBUTIONS AND RESPONSE CURVES

# Example: Investigate species frequencies in response to litter cover. Interpretation: One of the most interesting patterns here is that the most abundant species, BOGR, decreases in cover with increasing litter cover. The patterns are a little unclear because of the high cover of BOGR obscuring the y axis for the other species.
# Calculate the total cover of each species
species.totals <- colSums(poren.wide)
# Make a new object that contains just the most abundant spp
abund.spp <- names(species.totals[species.totals > 15]) 
# Prepare the data and make the graph
poren.long %>% 
  dplyr::select(species, cover, LITT) %>%
  filter(species %in% abund.spp) %>% # Get abundant species, as calculated above
  ggplot(aes(x = LITT, y = cover, col = species)) +
  geom_line() +
  xlab("Litter (%)") +
  ylab("Total cover (%)") +
  facet_wrap( ~ species) +
  theme_bw()

## DETERMINE THE RATIO OF TEMPORAL TO SPATIAL VARIATION IN COMPOSITION

### TEMPORAL TURNOVER

# Use the turnover function to assess temporal turnover of each site. Interpretation: All transects in the 'continuous heavy' and 'new exclosure' grazing treatments showed declines in turnover around halfway through the experiment. Transects in the exclosure treatment showed minimal changes in turnover over time. These results show that there was a continuous turnover in species throughout the experiment of about 30% of species in most time period and treatment combinations.
total.to <- turnover(poren.long, time.var = "Year", species.var = "species", abundance.var = "cover", replicate.var = "transect.num")
# Add Grazing.Treatment back in so we can make graphs with it
poren.to <- inner_join(total.to, poren.trts, by = "transect.num")
ggplot(poren.to, aes(x=Year, y=total)) +
  geom_line(aes(colour=transect.num, lty=transect.num)) +
  scale_color_manual(values = c(rep(c("yellowgreen", "steelblue4", "firebrick", "orange"), 6))) +
  scale_linetype_manual(values=c(rep(c(1,2,3,4),6)))+
  facet_wrap(~Grazing.Treatment) + # Make a different panel for each grazing treatment
  ylab("Total turnover \n(proportion of species that changed)") + 
  xlab("Period endpoint") +
  guides(colour=guide_legend(title="Transect")) +
  theme_bw(base_size = 22) +
  theme(panel.grid.major = element_blank (), # remove major grid
        panel.grid.minor = element_blank (), text = element_text(size = 12), 
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none")
ggsave("FIGURE_Plant_expt_turnover.png", width = 8, height = 5, dpi = 600)

### VISUALISE and QUANTIFY COMPOSITIONAL TURNOVER VIA DETRENDED CORRESPONDENCE ANALYSIS (DCA) and PRINCIPAL COORDINATES ANALYSIS (PCoA)

# The length of the gradient is given to us from the inertia values (Leps and Smilauer 2003). Interpretation: The first axis is <3.7. This indicates moderate but not complete turnover (Leps and Smilauer 2003).

# Perform the DCA
dca <- decorana(poren.wide) 
dca
plot(dca)

## DETERMINE THE OPTIMAL DISSIMILARITY MEASURE FOR DISSIMILARITY-BASED ANALYSES
# We used Principal coordinates analysis (PCoA) for this, a.k.a. Classical Multidimensional Scaling. Interpretation: The Canberra distance shows the best spread of points.

# Euclidean distance: from vegdist function d[jk] = sqrt(sum(x[ij]-x[ik])^2)
pco.eu <- cmdscale(vegdist(poren.wide, method="euclidean"), eig=T) 

# Bray-Curtis dissimilarity: from vegdist function d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik]))
pco.bc <- cmdscale(vegdist(poren.wide, method="bray"), eig=T) 

# Alternative Gower distance: from vegdist function d[jk] = (1/NZ) sum(abs(x[ij] - x[ik])), NZ is the number of non-zero columns excluding double-zeros (Anderson et al. 2006)
pco.ag <- cmdscale(vegdist(poren.wide, method="altGower"), eig=T) 

# Manhattan dissimilarity: from vegdist function d[jk] = sum(abs(x[ij] - x[ik]))
pco.mh <- cmdscale(vegdist(poren.wide, method="manhattan"), eig=T) 

# Chi-square distance: from dsvdis function (exp - obs) / sqrt{exp}
pco.cs <- cmdscale(dsvdis(poren.wide, "chisq"), eig=T)

# Canberra distance: from vegdist function d[jk] = (1/NZ) sum ((x[ij]-x[ik])/(x[ij]+x[ik]))
pco.ca <- cmdscale(vegdist(poren.wide, "canberra"), eig=T) 

par(mfrow=c(3,2))
plot(pco.eu$points, main="PCoA Euclidean (i.e. PCA)")
plot(pco.bc$points, main="PCoA Bray-Curtis") 
plot(pco.ag$points, main="PCoA alt Gower") 
plot(pco.mh$points, main="PCoA Manhattan") 
plot(pco.cs$points, main="PCoA Chi-squared") 
plot(pco.ca$points, main="PCoA Canberra")
par(opar)

## PRINCIPAL CO-ORDINATES ANALYSIS (PCoA)

# Perform the PCoA using Canberra distance (as determined in the EDA). Canberra distance: from vegdist function d[jk] = (1/NZ) sum ((x[ij]-x[ik])/(x[ij]+x[ik])) where NZ is the number of non-zero entries
pco.ca <- cmdscale(vegdist(poren.wide, "canberra"), eig=T) 

# The variation explained by each axis is its eigenvalue divided by the sum of the positive eigenvalues. Interpretation: The first axis explains 8.6% and the second axis explains 4.6% of the variation. This is quite low, given the low number of species.
pco.ca$eig[1] / sum(pco.ca$eig[pco.ca$eig > 0])  
pco.ca$eig[2] / sum(pco.ca$eig[pco.ca$eig > 0])

# Extract species scores: the correlation between the raw data and the transect scores
pco.ca.species <- as.data.frame(cor(poren.wide, pco.ca$points)) 
pco.ca.species$species <- rownames(pco.ca.species)
names(pco.ca.species)[1:2] <- c("PCO1", "PCO2")
head(pco.ca.species)

# Make a graph of the species scores
ggplot(pco.ca.species, aes(x = PCO1, y = PCO2)) +
  #geom_point(pch = 21, size = 3, alpha = 0.6, fill = "steelblue4") +
  geom_text(aes(label = species), size = 2) +
  ggtitle("Species scores, PCoA Canberra") +
  xlab("PCoA axis 1 (8.6%)") + ylab("PCoA axis 2 (4.6%)") +
  theme_bw()

# Extract the pcoa transect scores and add to the main dataset
poren$PCO1 <- pco.ca$points[, 1]
poren$PCO2 <- pco.ca$points[, 2]

# Make a graph of the transect scores (note alternative options for point colours). Interpretation: There appears to be some clustering of particular transects in ordination space, showing that particular transects have particular compositions. The earlier years are all in the lower portion of the ordination graph, indicating that these differ in species composition compared to later years. There seem to be compositional differences between different grazing treatments. The continuous light and exclosure treatments are distinct from those in the heavier grazing treatments along axis 1.
poren %>% ggplot(aes(x = PCO1, y = PCO2)) +
  geom_point(aes(fill = as.factor(Year)), pch = 21, size = 3, alpha = 0.6) +
  #geom_point(aes(fill = Grazing.Treatment), pch = 21, size = 3, alpha = 0.6) +
  #geom_point(aes(fill = transect.num), pch = 21, size = 3, alpha = 0.6) +
  ggtitle("Transect scores, PCoA Canberra") +
  xlab("PCoA axis 1 (8.6%)") + ylab("PCoA axis 2 (4.6%)") +
  #guides(fill = guide_legend(title = "Year")) +
  theme_bw()

# Make a trajectory plot by grazing treatment. Here, we can see the change in composition within each transect over time. Interpretatation: transects within each grazing treatment appear to be changing in composition towards particular parts of the ordination. 
poren %>% 
  dplyr::select(PCO1, PCO2, Trt_transect, Grazing.Treatment) %>%
  ggplot(aes(x = PCO1, y = PCO2)) +
  geom_path(arrow = arrow(length = unit(0.2, "cm")), aes(colour = Trt_transect)) +
  facet_wrap(~ Grazing.Treatment) +
  guides(colour = guide_legend(title = "Transect")) +
  xlab("PCoA axis 1 (8.6%)") + ylab("PCoA axis 2 (4.6%)") +
  theme_bw() +
  theme(panel.grid.major = element_blank (), panel.grid.minor = element_blank (), text = element_text(size = 12))

###################################################
### DATA ANALYSIS: 
### 1. Permutational analysis of variance (PERMANOVA)
### 2. Using the ordination sample scores in a linear model
### 3. Mean dissimilarity as response in regression
### 4. Principal response curves (PRC)
### 5. Beta diversity decomposition including local contributions to beta diversity (LCBD)
### 6. Schaefer null model analysis (demo code only; not presented in the case study)
### 7. Venn diagrams (demo code only; not presented in the case study)
### 8. Compositional pivot days (demo code only; not presented in the case study)
###################################################

#### 1. PERMANOVA
# Test the significance of differences in composition among years. Interpretation: The changes in composition over time were significant and different transects changed differently. 
(poren.adonis <- adonis(poren.wide ~ poren$transect * poren$Year, method = 'canberra', data = poren))

#### 2. USING THE ORDINATION SITE SCORES AS A RESPONSE VARIABLE IN A LINEAR MODEL
# Model PCoA axis 1 scores. Interpretation: Temporal changes in composition are not significant along PCoA axis 1. There are compositional differences in grazing treatments on PCoA axis 1, but there are significant temporal differences on PCoA axis 2.

# Linear regression and Principal response curves requires a 'baseline' treatement against which the other treatments are compared. In this case, we make the ungrazed treatment the baseline group for comparison
poren$grazing.order <- relevel(as.factor(poren$Grazing.Treatment), ref = "exclosure")

# Fit the linear models
summary(m1 <- lm(PCO1 ~ grazing.order + Year, data = poren))
summary(m2 <- lm(PCO2 ~ grazing.order + Year, data = poren))


#### 3.  MEAN DISSIMILARITY AS RESPONSE IN REGRESSION 
# Calculate dissimilarity matrix, replace diagonal elements with NA to leave out values comparing sample against itself, and calculate mean dissimilarity values for each sample
dis.ca <- as.matrix(vegdist(poren.wide, "canberra", upper = TRUE))
diag(dis.ca) <- NA
mean.dis <- colMeans(as.data.frame(dis.ca), na.rm = T)

# Fit the linear models
summary(m1 <- lm(mean.dis ~ grazing.order + Year, data = poren))
summary(m2 <- lm(mean.dis ~ grazing.order + Year, data = poren))


#### 4. PRINCIPAL RESPONSE CURVES

# Perform the PRC. Specify the composition data as the response, the grazing as the treatment, and year as the time variable. Interpretation: PASM, STCO, BRTE, and BOGR had the largest influence on the difference in composition between each treatment compared to the exclosure treatment. The continuous light treatment was less different in composition compared to the exclosure treatment. The continuous light composition is more similar to the exclosure treatment at the very beginning and then in the later time periods.
poren.prc <- prc(response = poren.wide, treatment = poren$grazing.order, time = as.factor(poren$Year))
poren.prc 
prc.summ <- summary(poren.prc)
# Select only those species that have high PRC scores, specify scaling=1 for transect scores. This graph is scaled by transect scores and only shows species with high scores
plot(poren.prc, select = abs(prc.summ$sp) > 0.5, scaling = 1, xlab = "Year")
plot(poren.prc, select = abs(prc.summ$sp) > 0.5, scaling = 1, xlab = "Year", legpos = NA, ylim = c(-0.5, 0.2), cex.axis = 1.5, cex.lab = 1.5, lwd = 4, const = 0.5, cex = 1.5)

# This graph is scaled by species scores and only shows species with high scores. Specify scaling=2
plot(poren.prc, select = abs(prc.summ$sp) > 0.5, scaling = 2, xlab = "Year", legpos = NA, ylim = c(-0.5, 0.2), cex.axis = 1.5, cex.lab = 1.5, lwd = 4, const = 0.5, cex = 1.5) 

png(filename = "FIGURE_Plant_expt_PRC.png", width = 10, height = 6, units = "in", res = 600)
plot(poren.prc, select = abs(prc.summ$sp) > 0.5, scaling = 2, xlab = "Year", legpos = NA, ylim = c(-0.5, 0.2), cex.axis = 1.5, cex.lab = 1.5, lwd = 4, const = 0.5, cex = 1.5) 
dev.off()

# Test significance of PRC by permutation. Interpretation: The differences in composition over time between treatments are statistically significant.
ctrl <- how(within = Within(type = "series"), plots = Plots(strata = poren$transect.num, type = "free"), nperm = 999) # Specify the structure of the permutations
anova(poren.prc, permutations = ctrl, first = TRUE) # Perform significance test


#### 5. BETA DIVERSITY DECOMPOSITION

# Investigate temporal nestedness for the first and last sample times (2007 and 2014), following Baselga and Orme (2012). Create presence-absence matrices for each year.

poren.pa <- as.data.frame(ifelse(poren.wide > 0, 1, 0)) # create presence-absence dataframe

poren.pa.2007 <- poren.pa %>% 
  rownames_to_column('transect.time') %>% # make transect.time a column in the dataset
  separate(transect.time, sep = -4, into = c("temp", "Year")) %>%
  dplyr::filter(Year == 2007) %>%
  dplyr::select(-Year, -temp)

poren.pa.2014 <- poren.pa %>% 
  rownames_to_column('transect.time') %>% # make transect.time a column in the dataset
  separate(transect.time, sep = -4, into = c("temp", "Year")) %>%
  dplyr::filter(Year == 2014) %>%
  dplyr::select(-Year, -temp) 

# Calculate temporal nestedness
(por.nest <- beta.temp(poren.pa.2007, poren.pa.2014, index.family = "sor"))
# beta.sim is Simpson dissimilarity(= turnover component of Sorensen dissimilarity)
# beta.sne is the nestedness component of Sorensen dissimilarity
# beta.sor is Sørensen dissimilarity

poren.nestedness <- cbind(poren.trts, por.nest)

# Plot the square-root-transformed components; x axis is nestedness component, the y axis is the turonover component. Points are labelled by transect number.
# Interpretation: S13 has high nestedness (beta diversity of time with lower richness is encompassed by time with higher richness) and low alpha diversity (few species in common between times). S1 has low nestedness (beta diversity of time with lower richness is unique compared to that with higher richness) and high alpha diversity (many species in common between times). This negative relationship makes sense because when you have high nestedness then you expect your samples would have fewer species in common than if you have low nestedness.

poren.nestedness %>%
  mutate(sqrt.beta.sim = sqrt(beta.sim), sqrt.beta.sne = sqrt(beta.sne), sqrt.beta.sor = sqrt(beta.sor)) %>%
  ggplot(aes(x = sqrt.beta.sne, y = sqrt.beta.sim)) +
  geom_point(pch = 21, size = 4, fill = "steelblue3") + 
  geom_text(aes(label = transect.num) ,hjust = -0.3, vjust = 0) +
  ylab(expression(sqrt(beta[sim]))) + xlab(expression(sqrt(beta[sne]))) +
  theme_bw(base_size = 14)


### 6. LOCAL CONTRIBUTIONS TO BETA DIVERSITY
# This tells us how unique each transect-time combination is in terms of species composition relative to the total variation in species composition, i.e., total beta diversity.

# Calculate beta diversity. sqrt.D=T = dissimilarities in matrix D are square-rooted
beta.div.result <- beta.div(poren.wide, "canberra", sqrt.D = T, nperm = 999)

# Look at total sums of squares and total beta diversity
beta.div.result$beta

# Combine LCBD output with main dataframe to explore patterns
poren$LCBD <- beta.div.result$LCBD
poren$LCBD_p.value <- beta.div.result$p.LCBD
poren$LCBD_Significance <- ifelse(poren$LCBD_p.value < 0.05, "Sig.", "NS")

# Which transects had significant LCBDs? Print out those transects that had P < 0.05
filter(poren, LCBD_p.value < 0.05) %>%
  dplyr::select(transect.time, Grazing.Treatment)

# Make graph to be able to visualise LCBDs (following Lamy et al. 2015, Fig. 3). Make the graph with year on the x axis and transect number on the y axis. Use points and make them different colours based on whether they are significant or not and larger if that have higher LCBD. 
# Interpretation: Most of the local contributions to beta diversity belong to transect 6, 15, and 16. Most of these significant contributions are in the later time periods.
poren %>% 
  ggplot(aes(x = Year, y = Trt_transect)) +
  geom_point(aes(size = LCBD, fill = LCBD_Significance, col = LCBD_Significance), pch = 21) +
  scale_x_continuous(limits = c(2007, 2014), breaks = c(2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014)) +
  scale_fill_manual(values = c("steelblue3", "firebrick3")) +
  scale_colour_manual(values = c("steelblue3", "black")) +
  coord_fixed(ratio = 1) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
  ylab("Transect") +
  theme_bw(base_size = 18) +
  theme(legend.title = element_blank(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggsave("FIGURE_Plant_expt_LCBD.png", dpi = 600, width = 8, height = 12)

#### TEMPORAL COHERENCE 
# Measures if the degree of change is similar across different sets of samples. Values closer to 1 indicate synchrony between transects. 
# Interpretation: The resultant value is not close to one showing little synchronous change among transects.

# Do 2-way ANOVA and factor out transect variation
(mod.sum <- summary(mod <- aov(poren$PCO1 ~ Trt_transect + Year + Error(Trt_transect), data = poren)))

# Calculate the intra-class correlation coefficient: (msy - mse) / (msy + [n-1]mse)
print(paste("icc =", icc <- (mod.sum$`Error: Within`[[1]][[3]][[1]] - mod.sum$`Error: Within`[[1]][[3]][[2]]) / (mod.sum$`Error: Within`[[1]][[3]][[1]] + (23 * mod.sum$`Error: Within`[[1]][[3]][[2]]))))  


#### 7. NULL MODEL EXAMPLE (following Schaefer et al. 2005) 

# Arrange data to have covers of each time in columns

# Use the data in long format
por <- poren.long %>%
  dplyr::select(Year, Grazing.Treatment, transect.num, species, cover) %>% 
  spread(key = Year, value = cover, fill = 0) # Put them into format with years as columns filled with the covers

# Calculate coefficient of variation (CV) and cover abundance (AB) through time for each species on each transect where it is present: CV = sd(x)/mean(x); AB = mean(x)

# Create objects to fill in loops
CV <- numeric(nrow(por))
AB <- numeric(nrow(por))

# Create a loop to run through each row in "por" object
for(i in 1:nrow(por)) {
  # Calculate CV of each species in each transect over all years
  # (columns 4-11 are the covers for each year)
  CV[i] <- sd(as.numeric(por[i, 4:11]))/mean(as.numeric(por[i,4:11])) 
  # Calculate the mean cover of each species in each transect 
  # over all years
  AB[i] <- mean(as.numeric(por[i, 4:11])) 
}

# Fit non-linear function to CV-AB relationship and predict CV for each species

# Plot the observed relationship between Cv and AB
# If the slope of this relationship is zero then the variability in 
# abundance is independent of mean abundance
plot(CV ~ AB) 

# Generate a vector for plotting
x <- seq(min(AB), max(AB), length.out=length(AB)) 

# Try fitting exponential decay functions with different numbers of parameters

# Fit one parameter exponential decay function - blue line
exp1 <- nls(CV ~ a * exp(-b * AB), start=list(a=1, b=1))
lines(x, predict(exp1, list(AB = x)), col="blue", lwd=2)
a <- coef(exp1)[1]
b <- coef(exp1)[2]

# Fit two parameter exponential decay function - red line
exp2 <- nls(CV ~ y0 + a * exp(-b * AB), start=list(a=0.5, b=1, y0=1))  
lines(x, predict(exp2, list(AB = x)), col="red", lwd=2)
a <- coef(exp2)[1]
b <- coef(exp2)[2]
y0 <- coef(exp2)[3]

# Fit four parameter exponential decay function - green line  
exp3 <- nls(CV ~ a * exp(-b * AB) + c * exp(-d * AB), start=list(a=1, b=1, c=1, d=0)) 
lines(x, predict(exp3, list(AB = x)), col="green", lwd=2)
a <- coef(exp3)[1]
b <- coef(exp3)[2]
c <- coef(exp3)[3]
d <- coef(exp3)[4]

# Compare the fits of the models

anova(exp1,exp2,exp3) 
AIC(exp1)
AIC(exp2)
AIC(exp3) 

# Interpretation: The four parameter model (exp3) fits the data the best because it has the lowest residual sum of squares and also the lowest AIC

# Select a reference community for each transect = time 1, i.e. 2007 for all transects in this dataset

# Use the exponential decay curve fit to generate a predicted CV for each species at each transect based on the observed abundance

# Put the observed covers in 2007 (reference community) into the exponential decay curve
p.CV.t1 <- a * exp(-b * por$`2007`) + c * exp(-d * por$`2007`) 

# Plot the relationship    
plot(CV, p.CV.t1) # there are a bunch at high p.CV.t1

# Model the relationship
summary(lm(CV ~ p.CV.t1))  
# Note the adj R^2 = 0.532

# Change absent species to AB values of 1
ref <- ifelse(por$`2007`==0,1,por$`2007`) 

# Convert CV into a standard deviation to use in random draws
SD <- p.CV.t1 * ref 

# Feed the observed mean abundance (across all years) and predicted standard deviation into the randomisation model to obtain a predicted abundance for each species in each transect for year 7. Do this 1000 times and fill a matrix. Note that Schaefer et al. (2005) recommend 5000 iterations but we used 10 for demonstration purposes.

# Create objects to fill 
A <- numeric(nrow(por))
# In this code, each predicted community is added to a new column 
# in the matrix
A.mat <- matrix(nrow = nrow(por), ncol=10)

# Do 100 iterations by row
for(it in 1:10) {
  for(i in 1:nrow(por)) {
    # Draw from a negative binomial distribution
    A[i] <- rnbinom(1, mu=ref[i], size = ((ref[i]+ref[i]^2)/(SD[i]^2)))
    # It is impossible to have a percent cover greater than 100
    # So re-draw if this happens
    while(A[i] > 100) { 
      A[i] <- rnbinom(1, mu=ref[i], size = ((ref[i]+ref[i]^2)/(SD[i]^2)))
    }
  }
  A.mat[, it] <- A 
}

# Arrange the output. Note: The code here just tests for significant change between year 1 and year 7, but we could equally assess significant other periods. 

# Change the matrix to dataframe
A.matdf <- as.data.frame(A.mat) 

# Select the reference communities and transect labels  
# from the original dataframe
t1 <- as.data.frame(subset(por, select=c(transect.num, species, `2007`)))

# Join the observed with predicted communities
joint <- cbind(t1, A.matdf) 

# Arrange data to be able to calculate dissimilarities between observed and predicted communities

# Put species in columns and times in rows to calculate 
# compositional dissimilarity between observed and 
# predicted communities
j2 <- joint %>%
  
  # Put dataset into long format
  gather(key=comm, value=cover, -transect.num, -species) %>% 
  
  # Then make it into wide, with species in columns 
  # (i.e. format for calculating dissimilarities)
  spread(key=species, value=cover, fill = 0) %>%
  
  # Make a new variable that is a concatenation of the 
  # transect number and the label for the predicted community
  mutate(transect_comm=paste(transect.num, comm, sep="_")) %>% 
  
  # Put this new variable on the left of the object
  dplyr::select(transect_comm, everything()) 

# Calculate dissimilarities between observed and predicted communities

# Convert the object to a data frame so we can assign 
# row names that will be kept in the vegdist object created
j3 <- as.data.frame(j2) 
rownames(j3) <- j3$transect_comm

# Calculate the Bray-Curtis dissimilarity between the observed and
# predicted communities
# This gives a site by site matrix with row names and column headers
# filled with dissimilarities
bc <- as.data.frame(as.matrix(vegdist(j3[,4:length(j3)], method="bray"))) 

# Create a frequency distribution of Bray-Curtis dissimilarities for each transect

# Make a column for each transect community
bc$transect_comm <- rownames(bc)

# Make an object that has just the site number and the 
# predicted community label
transects <- subset(j3, select=c(transect_comm, transect.num))

# Join the labelling information to the main data file
bcj <- full_join(bc, transects, by="transect_comm") 

# Assess significant change is done on a per transect basis. Let's just look at transect 1

# Select transect 1

s1 <- sort(bc[1:11, 1])[-1] # just select transect 1 comparisons vs. observed

# Now calculate values for each of the observed years and see if community change is greater or less than expected

# Make the original long object a data frame so we can assign
# row names
poren <- as.data.frame(poren)
rownames(poren) <- poren$transect.time

# Calculate the Bray-Curtis dissimilarities
bco <- as.data.frame(as.matrix(vegdist(poren.wide, method="bray")))
bco$transect.time <- rownames(bco)

# Make an object that has just the site number and the 
# observed community label
transects <- subset(poren, select=c(transect.time, transect.num))

# Join the labelling information to the main data file
bcot <- full_join(transects, bco, by="transect.time")

# Use the observed Bray-Curtis distance from 2008 to assess if there has been a significant change in compostion 

# Select transect 1
s1o <- bcot %>%
  
  filter(transect.num=="S01") %>%
  
  dplyr::select(transect.time, S01_2008)

# Put it all on a histogram so we can look at whether there was a significant change in composition over time

# Plot the frequency distribution from the predicted composition  
hist(s1, xlim = c(0, 1))

# Calculate upper and lower quantiles
quan.s1 <- quantile(s1, c(0.025, 0.975)) 

# Put lines for  the quantiles on the histogram in red
abline(v=(quan.s1), col="red")

# Put a blue line for the observed composition 
abline(v=(mean(s1o[2:8,2])), col="blue")  

# Interpretation: At transect 1, composition changed significantly less than expected by chance


### 8. VENN DIAGRAMS showing overlap in species between samples

# Make a list for 2007 and 2008 showing those species that occur in that 
# year.
spp.lists.year <- list("2007" = names(which(colSums(poren.pa.2007) > 0)),
                       "2014" = names(which(colSums(poren.pa.2014) > 0)))

# Show the list
spp.lists.year

# Calculate the shared species in the two samples

overlap <- calculate.overlap(spp.lists.year)

# Show the species that are shared and not shared between 2007 and 2014; 
# lists correspond to the three circles in the Venn diagram
# a1 = species in 2007; a2 = species in 2014; a3 = species in both 2007 and 2014
overlap 

# Plot the venn diagram
venn.plot <- venn.diagram(x = spp.lists.year, category.names = c("2007", "2014"), filename = NULL, cex.category.names = 2, cex = 2, fill = c(4, 5), lwd = 3, alpha = rep(0.5, 2))

grid.draw(venn.plot)


### 9. COMPOSITIONAL PIVOT DAYS

# Pivot days can be identified by plotting a pairwise dissimilarity matrix heat map.

poren.canberra.t1 <- vegdist(poren.test.transect, "canberra", upper = TRUE, diag = TRUE)

image(x = 2007:2014, y = 2007:2014, z = as.matrix(poren.canberra.t1), col = terrain.colors(100))

# Note that our example dataset doesn't exhibit a pattern of pivot days, so this code is just illustrative.