Supplementary file Code S4 - Single site with seasonal sampling

# Gilbert et al. 2012

# Install required packages

library(readxl)
library(tidyverse) # for data handling and manipulation
library(lubridate)
library(codyn) # a range of community dynamics analyses
library(vegan) # for a range of community analyses
library(zetadiv) # Zeta diversity
library(synchrony) # synchrony

# Required function to obtain Northern Hemisphere seasons from a date vector
getFourSeasons <- function(DATES) {
  WS <- as.Date("2012-12-01", format = "%Y-%m-%d") # start of Winter
  SE <- as.Date("2012-3-01",  format = "%Y-%m-%d") # start of Spring
  SS <- as.Date("2012-6-01",  format = "%Y-%m-%d") # start of Summer
  FE <- as.Date("2012-9-01",  format = "%Y-%m-%d") # start of Fall
  
  # Convert dates from any year to 2012 dates
  d <- as.Date(strftime(DATES, format="2012-%m-%d"))
  
  ifelse (d >= WS | d < SE, "Winter",
          ifelse (d >= SE & d < SS, "Spring",
                  ifelse (d >= SS & d < FE, "Summer", "Autumn")))
  } # end getFourSeasons function

# Download the file from the online supplementary material
download.file(url = "https://media-nature-com.ezproxy.aut.ac.nz/original/nature-assets/ismej/journal/v6/n2/extref/ismej2011107x8.xls", destfile="supplementary1.xls")

# Read in the data and name the object
# supp1 <- read_xls("ismej2011107x8.xls") # water chemistry data
OTU_table <- read_xls("ismej2011107x9.xls")
# supp3 <- read_xls("ismej2011107x10.xls") # other data

# Tidy the data into long format without zeros. 
Marine_long <- OTU_table[, -c(2:4)] %>% 
  dplyr::select(-starts_with("na"), -ends_with("Dp"), -ends_with("__1")) %>%
  gather(key = Sample, value = Abundance, -LSA_ID) %>% 
  filter(Abundance > 0) %>%
  separate(Sample, -10, into = c("Sample", "Date")) %>%
  separate(Sample, 17, into = c("Sample", "Number")) %>%
  separate(Number, into = c("Number", "x"), sep = "_") %>%
  separate(Date, c("Year", "Month", "Day"), convert = TRUE, remove = FALSE) %>%
  mutate(Date = as.Date(paste(Year, "-", Month, "-", Day, sep = ""))) %>%
  dplyr::select(-Sample, -x) %>%
  arrange(Date)

# Generate a variable (Days) that gives the distance in days for each sample from the start
startdate <- Marine_long$Date[1] # tell R what the first day of the times series is
Marine_long <- Marine_long %>% 
  mutate(Days = as.numeric(difftime(Marine_long$Date, startdate, units = "days"))) %>%
  arrange(Days, LSA_ID)

head(Marine_long)

# Wide format
Marine_wide <- spread(Marine_long, key = LSA_ID, value = Abundance, fill = 0) %>%
  arrange(Days) %>%
  as.data.frame(.) %>%
  column_to_rownames("Days") %>%
  dplyr::select(-Number, -Date, -Year, -Month, -Day)

Marine_wide[, 1:5]

# Generate a dataset for graphing
Marine_gg <- spread(Marine_long, key = LSA_ID, value = Abundance, fill = 0) %>%
  mutate(Season = getFourSeasons(Date)) %>%
  arrange(Days) %>%
  dplyr::select(Number, Date, Year, Month, Day, Days, Season) %>%
  print(n = Inf)

### Datasets for the most variable taxa only (the ones for which there is the greatest range in abundance across the dataset)
Variable_species <- Marine_long %>% 
  group_by(LSA_ID) %>%
  summarise(Mean_abundance = mean(Abundance), SD_abundance = sd(Abundance)) %>%
  filter(SD_abundance > 500) %>%
  select(LSA_ID) 

Marine_long_variable <- Marine_long %>%
  dplyr::filter(LSA_ID %in% Variable_species$LSA_ID)

Marine_wide_variable <- spread(Marine_long_variable, key = LSA_ID, value = Abundance, fill = 0) %>%
  arrange(Days) %>%
  as.data.frame(.) %>%
  column_to_rownames("Days") %>%
  dplyr::select(-Number, -Date, -Year, -Month, -Day)

##########################################################
# Exploratory Data Analysis (EDA)
##########################################################

nlevels(as.factor(Marine_long$LSA_ID)) # number of OTUs
nlevels(as.factor(Marine_long_variable$LSA_ID)) # number of the most variable OTUs
nlevels(as.factor(Marine_long$Days))  # number of temporal samples

### Number of OTUs
Marine_long %>% 
  group_by(Days) %>% 
  mutate(Presence = 1) %>% 
  summarise(Richness = sum(Presence)) %>%
  ggplot(aes(x = Days, y = Richness)) + 
  geom_point(pch = 21, size = 4) + 
  geom_smooth(colour = "steelblue3") +
  ylab("OTU richness") + xlab("Days") +
  theme_bw()

### Relative abundance distribution of OTUs
Marine_long %>% 
  group_by(LSA_ID) %>% 
  mutate(Presence = 1) %>% 
  summarise(Abundance = sum(Presence)) %>%
  ggplot(aes(x = Abundance)) + 
  geom_histogram(bins = 25, colour = "black", fill = "steelblue3") + 
  ylab("Frequency of OTUs") +
  theme_bw()

### Rank abundance models for each sample. Interpretation:  
plot(radfit(Marine_wide))

### Species accumulation curves. Interpretation:
plot(specaccum(Marine_wide, method = "random", permutations = 1000), xlab = "Days")

### Abundance vs. time
Marine_long %>%
  ggplot(aes(x = Days, y = Abundance, col = LSA_ID)) +
    geom_line(show.legend = F) +
    theme_bw(base_size = 18)

Marine_long_variable %>%
  ggplot(aes(x = Days, y = Abundance, col = LSA_ID)) +
  geom_line(show.legend = F) +
  theme_bw(base_size = 18)

### Turnover. Interpretation: Turnover is relatively low overall and declines through time
Marine_long %>% turnover(., time.var = "Days", species.var = "LSA_ID", abundance.var = "Abundance") %>% arrange(total)

Marine_long %>% 
  turnover(., time.var = "Days", species.var = "LSA_ID", abundance.var = "Abundance") %>%
  ggplot(aes(x = Days, y = total)) +
  geom_line(size = 1) +
  ylim(c(0,1)) +
  ylab("Total turnover (proportion)") + xlab("Days") +
  theme_bw()

### Using ordination to examine compositional variation in time.
Marine_dca <- decorana(Marine_wide) 
Marine_dca # axis lengths are short (< 4), so compositional turnover is relatively low

# Bray-Curtis dissimilarity: from vegdist function d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik]))
Marine_pco_bc <- cmdscale(vegdist(Marine_wide, method = "bray"), eig = T) 

(Marine_var1 <- Marine_pco_bc$eig[1] / sum(Marine_pco_bc$eig[Marine_pco_bc$eig > 0]))  # % variance explained
(Marine_var2 <- Marine_pco_bc$eig[2] / sum(Marine_pco_bc$eig[Marine_pco_bc$eig > 0]))

# Extract species scores: the correlation between the raw data and the transect scores
pco.ca.otus <- as.data.frame(cor(Marine_wide, Marine_pco_bc$points)) 
pco.ca.otus$OTU <- rownames(pco.ca.otus)
names(pco.ca.otus)[1:2] <- c("PCO1", "PCO2")
head(pco.ca.otus)

# Make a graph of the OTU scores
ggplot(pco.ca.otus, aes(x = PCO1, y = PCO2)) +
  geom_point(pch = 21, size = 3, alpha = 0.6, fill = "steelblue4") +
  #geom_text(aes(label = OTU), size = 2) +
  ggtitle("OTU scores, PCoA Bray Curtis dissimilarity") +
  xlab(paste("PCoA axis 1 (", round(Marine_var1, 3) * 100, "%)", sep = "")) + 
  ylab(paste("PCoA axis 2 (", round(Marine_var2, 3) * 100, "%)", sep = "")) +
  theme_bw(base_size = 18)

# Extract the pcoa sample scores and add to the main dataset
Marine_gg$PCO1 <- Marine_pco_bc$points[, 1]
Marine_gg$PCO2 <- Marine_pco_bc$points[, 2]

# Make a graph of the sample scores
Marine_gg %>% 
  filter(Year > 2003) %>%
  arrange(Year, Days) %>%
  ggplot(aes(x = PCO1, y = PCO2)) +
  #geom_path(aes(colour = Days), size = 2) +
  geom_path(size = 1.5) +
  geom_point(aes(fill = Season), pch = 21, size = 5, alpha = 0.8) +
  #scale_fill_gradient(low = "steelblue3", high = "firebrick3") +
  #scale_colour_gradient(low = "steelblue3", high = "firebrick3") +
  facet_grid(. ~ Year) +
  xlab(paste("PCoA axis 1 (", round(Marine_var1, 3) * 100, "%)", sep = "")) + 
  ylab(paste("PCoA axis 2 (", round(Marine_var2, 3) * 100, "%)", sep = "")) +
  theme_bw(base_size = 18)
ggsave("Marine_PCoA_yearFacet.png", width = 14, height = 6, dpi = 600)

##########################################################
# Synchrony
##########################################################

# The Loreau and de Mazancourt (2008) method. "Compute community-wide synchrony and its the significance via Monte Carlo randomizations. If all species fluctuate in perfect unison, the community-wide synchrony will be 1. If species undergo uncorrelated fluctuations, the community-wide synchrony will be 0. The Monte Carlo randomizations are performed by shuffling the columns of the community matrix independently."
# Interpretation: Community synchrony is calculated as 0.271, which is significant, showing that at least some taxa in the community are tracking each other in their changes over time

community.sync(Marine_wide, type = 1, nrands = 999) # shuffle each column
community.sync(Marine_wide, type = 2, nrands = 999) # shift columns, but retain order

# Examine within-year synchrony patterns (ask if groups of taxa are tracking each other withi each year)
# Interpretation: There is not much difference in the synchrony values among years
years <- c(unique(Marine_gg$Year))
for(i in 1:length(unique(Marine_gg$Year))){
  temp <- cbind(Marine_gg, Marine_wide) %>%
    filter(Year == years[i]) %>%
    dplyr::select(-Number, -Date, -Year, -Month, -Day, -Days, -Season) 
  
  synch.out <- community.sync(temp, type = 1, nrands = 999)
  
  synch.out.df <- data.frame(Year = years[i], obs.Synchrony = synch.out$obs, P.value = synch.out$pval)
  
  print(years[i])
  
  if(i == 1) { Marine_annual_synchrony <- synch.out.df }
  if(i > 1) { Marine_annual_synchrony <- rbind(Marine_annual_synchrony, synch.out.df) }
  
} # end i loop through years

Marine_annual_synchrony %>%
  filter(Year > 2003) %>%
  ggplot(aes(x = Year, y = obs.Synchrony)) +
  geom_point(size = 4) +
  geom_line(size = 1.5) +
  ylim(c(0, 1)) +
  ylab("Observed synchrony") +
  theme_bw(base_size = 18)
ggsave("Marine_Synchrony_year.png", width = 10, height = 6, dpi = 600)

##########################################################
# Time lag analysis
##########################################################

# Question: Is there directional change in species composition over time (across years)
# Interpretation: Although statistically significant, dissimilarity doesn't increase markedly over time. Pairwise dissimilarity varies a lot, implying that some temporal samples are relatively similar and others are relatively dissimilar (but this is not related to time is a simple linear way)

# Plot time Lag regression using Bray-Curtis dissimilarity
Marine_time_lag <- data.frame(bray.dist = as.numeric(vegdist(Marine_wide, method = "bray")), time.lag = as.numeric(vegdist(Marine_gg$Days, method = "euclidean")))

summary(lm(bray.dist ~ time.lag, data = Marine_time_lag))

Marine_time_lag %>% 
  #mutate(Lag = sqrt(interval)) %>%
  ggplot(aes(x = time.lag, y = bray.dist)) +
  geom_point() +
  stat_smooth(method = "lm", se = F, size = 1) + # fit a regression
  #stat_smooth(method = "loess", se = F, size = 1, col = "firebrick3") + # fit a curve
  xlab("Time lag") + ylab("Bray Curtis distance in composition") +
  theme_bw(base_size = 18)
ggsave("Marine_TimeLagBC.png", width = 10, height = 6, dpi = 600)

##########################################################
# Mean dissimilarity as response in regression to test for seasonality
##########################################################

# Question: How does the dissimilarity of samples change in time? Are these changes related to seasonality? Let's look at the taxa that are changing the most.
# Interpretation: This shows

# Generate seasonal datasets
temp <- Marine_wide_variable
temp$Season <- Marine_gg$Season
temp$Days <- Marine_gg$Days
 
#  four seasons
Marine_winter <- temp %>% 
  filter(Season == "Winter") %>% 
  dplyr::select(-Season) %>% 
  as.data.frame(.) %>%
  column_to_rownames("Days")

Marine_spring <- temp %>% 
  filter(Season == "Spring") %>% 
  dplyr::select(-Season) %>%
  as.data.frame(.) %>%
  column_to_rownames("Days") 

Marine_summer <- temp %>% 
  filter(Season == "Summer") %>% 
  dplyr::select(-Season) %>% 
  as.data.frame(.) %>%
  column_to_rownames("Days")

Marine_autumn <- temp %>% 
  filter(Season == "Autumn") %>% 
  dplyr::select(-Season) %>%
  as.data.frame(.) %>%
  column_to_rownames("Days") 

rm(temp)

# Calculate mean Bray-Curtis dissimilarity for seasonal datasets
# Bray-Curtis dissimilarity

Marine_bc_winter <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Winter"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_winter, method = "bray"), upper = TRUE)))))
Marine_bc_summer <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Summer"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_summer, method = "bray"), upper = TRUE)))))
Marine_bc_spring <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Spring"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_spring, method = "bray"), upper = TRUE)))))
Marine_bc_autumn <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Autumn"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_autumn, method = "bray"), upper = TRUE)))))

Marine_bc_values <- rbind(Marine_bc_winter, Marine_bc_summer, Marine_bc_spring, Marine_bc_autumn)

Marine_gg <- full_join(Marine_gg, Marine_bc_values)

Marine_gg %>% 
  filter(Year > 2003) %>%
  ggplot(aes(x = Days, y = Mean_bray_dist)) +
  stat_smooth(method = "lm", formula = y ~ poly(x, 16), size = 1) +
  #stat_smooth(method = "lm", se = F, size = 1) + # fit a regression
  #stat_smooth(method = "gam", se = F, size = 1, col = "firebrick3") + # fit a curve
  geom_point(aes(fill = Season), pch = 21, size = 4, alpha = 0.6) +
  xlab("Days") + ylab("Mean Bray-Curtis distance in composition") +
  ylim(c(0, 1)) +
  #ggtitle("Variable taxa") +
  theme_bw(base_size = 18)
ggsave("Marine_MeanDissBC.png", width = 10, height = 6, dpi = 600)

# Test for significance of the seasonal differences in mean dissimilarity
Marine_aov <- aov(Marine_gg$Mean_bray_dist[Marine_gg$Year != 2003] ~ Marine_gg$Season[Marine_gg$Year != 2003])
summary(Marine_aov)
TukeyHSD(Marine_aov)

##########################################################
# Zeta diversity
##########################################################

# Does the number of shared taxa (of those that were most variable) change for subsets of increasing numbers of temporal samples? 
# Calculate temporal normalised zeta diversity: Number of shared species among different numbers of temporal samples (orders of zeta) divided by the minimum number of species in the sites of each specific sample. 
#Interpretation: Zeta diversity declines, but only to around 0.67 showing that many of the OTUs are common and therefore shared across large numbers of temporal samples

Marine_pa <- Marine_wide_variable
Marine_pa[Marine_pa > 0] <- 1 

Marine_zeta <- data.frame(Order = 1:(length(unique(Marine_gg$Date)) - 1), Zeta_diversity = NA, Zeta_diversity_SD = NA)

for(z in 1:(length(unique(Marine_gg$Date)) - 1)) {
  zeta.out <- Zeta.order.mc(Marine_pa, order = z, normalize = "Simpson")
  Marine_zeta$Order[z] <- zeta.out$zeta.order
  Marine_zeta$Zeta_diversity[z] <- zeta.out$zeta.val
  Marine_zeta$Zeta_diversity_SD[z] <- zeta.out$zeta.val.sd
  print(z)
} # end z loop through zeta orders

Marine_zeta %>%
  filter(Order > 1) %>%
  ggplot(aes(x = Order, y = Zeta_diversity)) +
  geom_ribbon(aes(ymin = (Zeta_diversity - Zeta_diversity_SD), ymax = (Zeta_diversity + Zeta_diversity_SD)), fill = "steelblue3", alpha = 0.6) +
  geom_point(size = 4) +
  geom_line(size = 1.5) +
  xlab("Zeta order") +
  ylab("Zeta diversity") +
  theme_bw(base_size = 18)
ggsave("Marine_Zeta.png", width = 10, height = 6, dpi = 600)

##########################################################
# Zeta diversity by season
##########################################################

# Do seasons differ in their number of shared taxa for different sizes subsets of temporal samples? We would expect that seasons that are more variable in taxa composition, e.g., spring, should show fewer shared taxa, especially when including larger numbers of samples.
# Calculate temporal normalised zeta diversity: Number of shared taxa among different numbers of temporal samples (orders of zeta) divided by the minimum number of taxa in the sites of each specific sample. 
# Interpretation: 

Seasons <- unique(Marine_gg$Season)

for(i in 1:4) { # loop through the seasons
  
  Marine_pa <- Marine_wide_variable %>% mutate(Season = Marine_gg$Season)
  
  if(i == 1) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i])  %>% dplyr::select(-Season) }
  if(i == 2) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) }
  if(i == 3) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) }
  if(i == 4) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) }

  Marine_pa[Marine_pa > 0] <- 1 
  
  Marine_zeta <- data.frame(Season = NA, Order = (1:(dim(Marine_pa)[[1]]-1)), Zeta_diversity = NA, Zeta_diversity_SD = NA)
  
  for(z in 1:(dim(Marine_pa)[[1]]) - 1) {
    zeta.out <- Zeta.order.mc(Marine_pa, order = z, normalize = "Simpson")
    Marine_zeta$Season[z] <- Seasons[i]
    Marine_zeta$Order[z] <- zeta.out$zeta.order
    Marine_zeta$Zeta_diversity[z] <- zeta.out$zeta.val
    Marine_zeta$Zeta_diversity_SD[z] <- zeta.out$zeta.val.sd
    print(z)
  } # end z loop through zeta orders
  
  if(i == 1) { Marine_zeta_out <- Marine_zeta }
  if(i > 1) { Marine_zeta_out <- rbind(Marine_zeta_out, Marine_zeta) }
  
  } # end i loop through the seasons

Marine_zeta_out %>%
  dplyr::filter(Order > 1) %>%
  ggplot(aes(x = Order, y = Zeta_diversity)) +
  geom_ribbon(aes(ymin = (Zeta_diversity - Zeta_diversity_SD), ymax = (Zeta_diversity + Zeta_diversity_SD), fill = Season), alpha = 0.3) +
  geom_line(aes(colour = Season), size = 1.5, alpha = 0.8) +
  geom_point(aes(fill = Season), size = 2, pch = 21, colour = "black", alpha = 0.8) +
  xlab("Zeta order") +
  ylab("Zeta diversity") +
  theme_bw(base_size = 18)
ggsave("Marine_Zeta.png", width = 10, height = 6, dpi = 600)

summary(Marine_aov <- aov(Marine_zeta_out$Zeta_diversity ~ Marine_zeta_out$Season))
TukeyHSD(Marine_aov)