Supplementary file Code S4 - Single site with seasonal sampling # Gilbert et al. 2012 # Install required packages library(readxl) library(tidyverse) # for data handling and manipulation library(lubridate) library(codyn) # a range of community dynamics analyses library(vegan) # for a range of community analyses library(zetadiv) # Zeta diversity library(synchrony) # synchrony # Required function to obtain Northern Hemisphere seasons from a date vector getFourSeasons <- function(DATES) { WS <- as.Date("2012-12-01", format = "%Y-%m-%d") # start of Winter SE <- as.Date("2012-3-01", format = "%Y-%m-%d") # start of Spring SS <- as.Date("2012-6-01", format = "%Y-%m-%d") # start of Summer FE <- as.Date("2012-9-01", format = "%Y-%m-%d") # start of Fall # Convert dates from any year to 2012 dates d <- as.Date(strftime(DATES, format="2012-%m-%d")) ifelse (d >= WS | d < SE, "Winter", ifelse (d >= SE & d < SS, "Spring", ifelse (d >= SS & d < FE, "Summer", "Autumn"))) } # end getFourSeasons function # Download the file from the online supplementary material download.file(url = "https://media-nature-com.ezproxy.aut.ac.nz/original/nature-assets/ismej/journal/v6/n2/extref/ismej2011107x8.xls", destfile="supplementary1.xls") # Read in the data and name the object # supp1 <- read_xls("ismej2011107x8.xls") # water chemistry data OTU_table <- read_xls("ismej2011107x9.xls") # supp3 <- read_xls("ismej2011107x10.xls") # other data # Tidy the data into long format without zeros. Marine_long <- OTU_table[, -c(2:4)] %>% dplyr::select(-starts_with("na"), -ends_with("Dp"), -ends_with("__1")) %>% gather(key = Sample, value = Abundance, -LSA_ID) %>% filter(Abundance > 0) %>% separate(Sample, -10, into = c("Sample", "Date")) %>% separate(Sample, 17, into = c("Sample", "Number")) %>% separate(Number, into = c("Number", "x"), sep = "_") %>% separate(Date, c("Year", "Month", "Day"), convert = TRUE, remove = FALSE) %>% mutate(Date = as.Date(paste(Year, "-", Month, "-", Day, sep = ""))) %>% dplyr::select(-Sample, -x) %>% arrange(Date) # Generate a variable (Days) that gives the distance in days for each sample from the start startdate <- Marine_long$Date[1] # tell R what the first day of the times series is Marine_long <- Marine_long %>% mutate(Days = as.numeric(difftime(Marine_long$Date, startdate, units = "days"))) %>% arrange(Days, LSA_ID) head(Marine_long) # Wide format Marine_wide <- spread(Marine_long, key = LSA_ID, value = Abundance, fill = 0) %>% arrange(Days) %>% as.data.frame(.) %>% column_to_rownames("Days") %>% dplyr::select(-Number, -Date, -Year, -Month, -Day) Marine_wide[, 1:5] # Generate a dataset for graphing Marine_gg <- spread(Marine_long, key = LSA_ID, value = Abundance, fill = 0) %>% mutate(Season = getFourSeasons(Date)) %>% arrange(Days) %>% dplyr::select(Number, Date, Year, Month, Day, Days, Season) %>% print(n = Inf) ### Datasets for the most variable taxa only (the ones for which there is the greatest range in abundance across the dataset) Variable_species <- Marine_long %>% group_by(LSA_ID) %>% summarise(Mean_abundance = mean(Abundance), SD_abundance = sd(Abundance)) %>% filter(SD_abundance > 500) %>% select(LSA_ID) Marine_long_variable <- Marine_long %>% dplyr::filter(LSA_ID %in% Variable_species$LSA_ID) Marine_wide_variable <- spread(Marine_long_variable, key = LSA_ID, value = Abundance, fill = 0) %>% arrange(Days) %>% as.data.frame(.) %>% column_to_rownames("Days") %>% dplyr::select(-Number, -Date, -Year, -Month, -Day) ########################################################## # Exploratory Data Analysis (EDA) ########################################################## nlevels(as.factor(Marine_long$LSA_ID)) # number of OTUs nlevels(as.factor(Marine_long_variable$LSA_ID)) # number of the most variable OTUs nlevels(as.factor(Marine_long$Days)) # number of temporal samples ### Number of OTUs Marine_long %>% group_by(Days) %>% mutate(Presence = 1) %>% summarise(Richness = sum(Presence)) %>% ggplot(aes(x = Days, y = Richness)) + geom_point(pch = 21, size = 4) + geom_smooth(colour = "steelblue3") + ylab("OTU richness") + xlab("Days") + theme_bw() ### Relative abundance distribution of OTUs Marine_long %>% group_by(LSA_ID) %>% mutate(Presence = 1) %>% summarise(Abundance = sum(Presence)) %>% ggplot(aes(x = Abundance)) + geom_histogram(bins = 25, colour = "black", fill = "steelblue3") + ylab("Frequency of OTUs") + theme_bw() ### Rank abundance models for each sample. Interpretation: plot(radfit(Marine_wide)) ### Species accumulation curves. Interpretation: plot(specaccum(Marine_wide, method = "random", permutations = 1000), xlab = "Days") ### Abundance vs. time Marine_long %>% ggplot(aes(x = Days, y = Abundance, col = LSA_ID)) + geom_line(show.legend = F) + theme_bw(base_size = 18) Marine_long_variable %>% ggplot(aes(x = Days, y = Abundance, col = LSA_ID)) + geom_line(show.legend = F) + theme_bw(base_size = 18) ### Turnover. Interpretation: Turnover is relatively low overall and declines through time Marine_long %>% turnover(., time.var = "Days", species.var = "LSA_ID", abundance.var = "Abundance") %>% arrange(total) Marine_long %>% turnover(., time.var = "Days", species.var = "LSA_ID", abundance.var = "Abundance") %>% ggplot(aes(x = Days, y = total)) + geom_line(size = 1) + ylim(c(0,1)) + ylab("Total turnover (proportion)") + xlab("Days") + theme_bw() ### Using ordination to examine compositional variation in time. Marine_dca <- decorana(Marine_wide) Marine_dca # axis lengths are short (< 4), so compositional turnover is relatively low # Bray-Curtis dissimilarity: from vegdist function d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik])) Marine_pco_bc <- cmdscale(vegdist(Marine_wide, method = "bray"), eig = T) (Marine_var1 <- Marine_pco_bc$eig[1] / sum(Marine_pco_bc$eig[Marine_pco_bc$eig > 0])) # % variance explained (Marine_var2 <- Marine_pco_bc$eig[2] / sum(Marine_pco_bc$eig[Marine_pco_bc$eig > 0])) # Extract species scores: the correlation between the raw data and the transect scores pco.ca.otus <- as.data.frame(cor(Marine_wide, Marine_pco_bc$points)) pco.ca.otus$OTU <- rownames(pco.ca.otus) names(pco.ca.otus)[1:2] <- c("PCO1", "PCO2") head(pco.ca.otus) # Make a graph of the OTU scores ggplot(pco.ca.otus, aes(x = PCO1, y = PCO2)) + geom_point(pch = 21, size = 3, alpha = 0.6, fill = "steelblue4") + #geom_text(aes(label = OTU), size = 2) + ggtitle("OTU scores, PCoA Bray Curtis dissimilarity") + xlab(paste("PCoA axis 1 (", round(Marine_var1, 3) * 100, "%)", sep = "")) + ylab(paste("PCoA axis 2 (", round(Marine_var2, 3) * 100, "%)", sep = "")) + theme_bw(base_size = 18) # Extract the pcoa sample scores and add to the main dataset Marine_gg$PCO1 <- Marine_pco_bc$points[, 1] Marine_gg$PCO2 <- Marine_pco_bc$points[, 2] # Make a graph of the sample scores Marine_gg %>% filter(Year > 2003) %>% arrange(Year, Days) %>% ggplot(aes(x = PCO1, y = PCO2)) + #geom_path(aes(colour = Days), size = 2) + geom_path(size = 1.5) + geom_point(aes(fill = Season), pch = 21, size = 5, alpha = 0.8) + #scale_fill_gradient(low = "steelblue3", high = "firebrick3") + #scale_colour_gradient(low = "steelblue3", high = "firebrick3") + facet_grid(. ~ Year) + xlab(paste("PCoA axis 1 (", round(Marine_var1, 3) * 100, "%)", sep = "")) + ylab(paste("PCoA axis 2 (", round(Marine_var2, 3) * 100, "%)", sep = "")) + theme_bw(base_size = 18) ggsave("Marine_PCoA_yearFacet.png", width = 14, height = 6, dpi = 600) ########################################################## # Synchrony ########################################################## # The Loreau and de Mazancourt (2008) method. "Compute community-wide synchrony and its the significance via Monte Carlo randomizations. If all species fluctuate in perfect unison, the community-wide synchrony will be 1. If species undergo uncorrelated fluctuations, the community-wide synchrony will be 0. The Monte Carlo randomizations are performed by shuffling the columns of the community matrix independently." # Interpretation: Community synchrony is calculated as 0.271, which is significant, showing that at least some taxa in the community are tracking each other in their changes over time community.sync(Marine_wide, type = 1, nrands = 999) # shuffle each column community.sync(Marine_wide, type = 2, nrands = 999) # shift columns, but retain order # Examine within-year synchrony patterns (ask if groups of taxa are tracking each other withi each year) # Interpretation: There is not much difference in the synchrony values among years years <- c(unique(Marine_gg$Year)) for(i in 1:length(unique(Marine_gg$Year))){ temp <- cbind(Marine_gg, Marine_wide) %>% filter(Year == years[i]) %>% dplyr::select(-Number, -Date, -Year, -Month, -Day, -Days, -Season) synch.out <- community.sync(temp, type = 1, nrands = 999) synch.out.df <- data.frame(Year = years[i], obs.Synchrony = synch.out$obs, P.value = synch.out$pval) print(years[i]) if(i == 1) { Marine_annual_synchrony <- synch.out.df } if(i > 1) { Marine_annual_synchrony <- rbind(Marine_annual_synchrony, synch.out.df) } } # end i loop through years Marine_annual_synchrony %>% filter(Year > 2003) %>% ggplot(aes(x = Year, y = obs.Synchrony)) + geom_point(size = 4) + geom_line(size = 1.5) + ylim(c(0, 1)) + ylab("Observed synchrony") + theme_bw(base_size = 18) ggsave("Marine_Synchrony_year.png", width = 10, height = 6, dpi = 600) ########################################################## # Time lag analysis ########################################################## # Question: Is there directional change in species composition over time (across years) # Interpretation: Although statistically significant, dissimilarity doesn't increase markedly over time. Pairwise dissimilarity varies a lot, implying that some temporal samples are relatively similar and others are relatively dissimilar (but this is not related to time is a simple linear way) # Plot time Lag regression using Bray-Curtis dissimilarity Marine_time_lag <- data.frame(bray.dist = as.numeric(vegdist(Marine_wide, method = "bray")), time.lag = as.numeric(vegdist(Marine_gg$Days, method = "euclidean"))) summary(lm(bray.dist ~ time.lag, data = Marine_time_lag)) Marine_time_lag %>% #mutate(Lag = sqrt(interval)) %>% ggplot(aes(x = time.lag, y = bray.dist)) + geom_point() + stat_smooth(method = "lm", se = F, size = 1) + # fit a regression #stat_smooth(method = "loess", se = F, size = 1, col = "firebrick3") + # fit a curve xlab("Time lag") + ylab("Bray Curtis distance in composition") + theme_bw(base_size = 18) ggsave("Marine_TimeLagBC.png", width = 10, height = 6, dpi = 600) ########################################################## # Mean dissimilarity as response in regression to test for seasonality ########################################################## # Question: How does the dissimilarity of samples change in time? Are these changes related to seasonality? Let's look at the taxa that are changing the most. # Interpretation: This shows # Generate seasonal datasets temp <- Marine_wide_variable temp$Season <- Marine_gg$Season temp$Days <- Marine_gg$Days # four seasons Marine_winter <- temp %>% filter(Season == "Winter") %>% dplyr::select(-Season) %>% as.data.frame(.) %>% column_to_rownames("Days") Marine_spring <- temp %>% filter(Season == "Spring") %>% dplyr::select(-Season) %>% as.data.frame(.) %>% column_to_rownames("Days") Marine_summer <- temp %>% filter(Season == "Summer") %>% dplyr::select(-Season) %>% as.data.frame(.) %>% column_to_rownames("Days") Marine_autumn <- temp %>% filter(Season == "Autumn") %>% dplyr::select(-Season) %>% as.data.frame(.) %>% column_to_rownames("Days") rm(temp) # Calculate mean Bray-Curtis dissimilarity for seasonal datasets # Bray-Curtis dissimilarity Marine_bc_winter <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Winter"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_winter, method = "bray"), upper = TRUE))))) Marine_bc_summer <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Summer"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_summer, method = "bray"), upper = TRUE))))) Marine_bc_spring <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Spring"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_spring, method = "bray"), upper = TRUE))))) Marine_bc_autumn <- data.frame(Days = Marine_gg$Days[Marine_gg$Season == "Autumn"], Mean_bray_dist = as.numeric(colMeans(as.matrix(as.dist(vegdist(Marine_autumn, method = "bray"), upper = TRUE))))) Marine_bc_values <- rbind(Marine_bc_winter, Marine_bc_summer, Marine_bc_spring, Marine_bc_autumn) Marine_gg <- full_join(Marine_gg, Marine_bc_values) Marine_gg %>% filter(Year > 2003) %>% ggplot(aes(x = Days, y = Mean_bray_dist)) + stat_smooth(method = "lm", formula = y ~ poly(x, 16), size = 1) + #stat_smooth(method = "lm", se = F, size = 1) + # fit a regression #stat_smooth(method = "gam", se = F, size = 1, col = "firebrick3") + # fit a curve geom_point(aes(fill = Season), pch = 21, size = 4, alpha = 0.6) + xlab("Days") + ylab("Mean Bray-Curtis distance in composition") + ylim(c(0, 1)) + #ggtitle("Variable taxa") + theme_bw(base_size = 18) ggsave("Marine_MeanDissBC.png", width = 10, height = 6, dpi = 600) # Test for significance of the seasonal differences in mean dissimilarity Marine_aov <- aov(Marine_gg$Mean_bray_dist[Marine_gg$Year != 2003] ~ Marine_gg$Season[Marine_gg$Year != 2003]) summary(Marine_aov) TukeyHSD(Marine_aov) ########################################################## # Zeta diversity ########################################################## # Does the number of shared taxa (of those that were most variable) change for subsets of increasing numbers of temporal samples? # Calculate temporal normalised zeta diversity: Number of shared species among different numbers of temporal samples (orders of zeta) divided by the minimum number of species in the sites of each specific sample. #Interpretation: Zeta diversity declines, but only to around 0.67 showing that many of the OTUs are common and therefore shared across large numbers of temporal samples Marine_pa <- Marine_wide_variable Marine_pa[Marine_pa > 0] <- 1 Marine_zeta <- data.frame(Order = 1:(length(unique(Marine_gg$Date)) - 1), Zeta_diversity = NA, Zeta_diversity_SD = NA) for(z in 1:(length(unique(Marine_gg$Date)) - 1)) { zeta.out <- Zeta.order.mc(Marine_pa, order = z, normalize = "Simpson") Marine_zeta$Order[z] <- zeta.out$zeta.order Marine_zeta$Zeta_diversity[z] <- zeta.out$zeta.val Marine_zeta$Zeta_diversity_SD[z] <- zeta.out$zeta.val.sd print(z) } # end z loop through zeta orders Marine_zeta %>% filter(Order > 1) %>% ggplot(aes(x = Order, y = Zeta_diversity)) + geom_ribbon(aes(ymin = (Zeta_diversity - Zeta_diversity_SD), ymax = (Zeta_diversity + Zeta_diversity_SD)), fill = "steelblue3", alpha = 0.6) + geom_point(size = 4) + geom_line(size = 1.5) + xlab("Zeta order") + ylab("Zeta diversity") + theme_bw(base_size = 18) ggsave("Marine_Zeta.png", width = 10, height = 6, dpi = 600) ########################################################## # Zeta diversity by season ########################################################## # Do seasons differ in their number of shared taxa for different sizes subsets of temporal samples? We would expect that seasons that are more variable in taxa composition, e.g., spring, should show fewer shared taxa, especially when including larger numbers of samples. # Calculate temporal normalised zeta diversity: Number of shared taxa among different numbers of temporal samples (orders of zeta) divided by the minimum number of taxa in the sites of each specific sample. # Interpretation: Seasons <- unique(Marine_gg$Season) for(i in 1:4) { # loop through the seasons Marine_pa <- Marine_wide_variable %>% mutate(Season = Marine_gg$Season) if(i == 1) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) } if(i == 2) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) } if(i == 3) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) } if(i == 4) { Marine_pa <- Marine_pa %>% filter(Season == Seasons[i]) %>% dplyr::select(-Season) } Marine_pa[Marine_pa > 0] <- 1 Marine_zeta <- data.frame(Season = NA, Order = (1:(dim(Marine_pa)[[1]]-1)), Zeta_diversity = NA, Zeta_diversity_SD = NA) for(z in 1:(dim(Marine_pa)[[1]]) - 1) { zeta.out <- Zeta.order.mc(Marine_pa, order = z, normalize = "Simpson") Marine_zeta$Season[z] <- Seasons[i] Marine_zeta$Order[z] <- zeta.out$zeta.order Marine_zeta$Zeta_diversity[z] <- zeta.out$zeta.val Marine_zeta$Zeta_diversity_SD[z] <- zeta.out$zeta.val.sd print(z) } # end z loop through zeta orders if(i == 1) { Marine_zeta_out <- Marine_zeta } if(i > 1) { Marine_zeta_out <- rbind(Marine_zeta_out, Marine_zeta) } } # end i loop through the seasons Marine_zeta_out %>% dplyr::filter(Order > 1) %>% ggplot(aes(x = Order, y = Zeta_diversity)) + geom_ribbon(aes(ymin = (Zeta_diversity - Zeta_diversity_SD), ymax = (Zeta_diversity + Zeta_diversity_SD), fill = Season), alpha = 0.3) + geom_line(aes(colour = Season), size = 1.5, alpha = 0.8) + geom_point(aes(fill = Season), size = 2, pch = 21, colour = "black", alpha = 0.8) + xlab("Zeta order") + ylab("Zeta diversity") + theme_bw(base_size = 18) ggsave("Marine_Zeta.png", width = 10, height = 6, dpi = 600) summary(Marine_aov <- aov(Marine_zeta_out$Zeta_diversity ~ Marine_zeta_out$Season)) TukeyHSD(Marine_aov)