# Sean Kinard # 07-19-2021 # Spring 17 Texas Coastal Prairie # Invert diversity regressions with environmental predictors # Load Packages library(tidyverse) library(gridExtra) library(Hmisc) library("writexl") # Load Data invert <- read_csv("sp17_data_files/invert_diversity_estimates.csv") env <- read_csv("sp17_data_files/sp17_site_x_env.csv") # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - # # cleaning up data frames to include only the a priori selected variables and community abundance matrix (env <- select(env, STAID, AP, flash.index, LFPP, NH4., log.cond, Rosgen.Index)) # Merge dataframes to ensure matching rows (sites) msterinvert <- merge(env,invert, by = "STAID") # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - # # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # Hypothesis test: Diversity Correlates with Precipitation : # :::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # Species richness summary(lm(formula= rich ~ AP, data = msterinvert)) # p-value: 0.5101 # Shannon Entropy summary(lm(formula= shannon ~ AP, data = msterinvert)) # p-value: 0.505 # Simpson Entropy summary(lm(formula= simpson ~ AP, data = msterinvert)) # p-value: 0.5009 # plots (All Sites) plot_1 <- ggplot(msterinvert, aes(x=AP, y=rich)) + geom_smooth(method=lm, formula = y ~ poly(x, 3), se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Species Richness") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) plot_2 <- ggplot(msterinvert, aes(x=AP, y=shannon)) + geom_smooth(method=lm, formula = y ~ poly(x, 3), se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Shannon Entropy") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) plot_3 <- ggplot(msterinvert, aes(x=AP, y=simpson)) + geom_smooth(method=lm, formula = y ~ poly(x, 3), se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Gini-Simpson Index") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) grid.arrange(plot_1, plot_2, plot_3, nrow=1) # Result: Visual inspection suggests that biodiversity peaks within the middle of the rainfall gradient. Linear regressions are not significant for all Hill diversity metrics. #TRC has extremely low diversity and may constitute an outlier. # Regressions Removing TRC for outlier effects: msterinvert_TRCrm <- msterinvert[-c(which(msterinvert$rich < 9)),] # Species richness summary(lm(formula= rich ~ AP, data = msterinvert_TRCrm)) # p-value: 0.09988 # Shannon Entropy summary(lm(formula= shannon ~ AP, data = msterinvert_TRCrm)) # p-value: 0.08669 # Simpson Entropy summary(lm(formula= simpson ~ AP, data = msterinvert_TRCrm)) # p-value: 0.07457 # plots (All Sites) plot_4 <- ggplot(msterinvert_TRCrm, aes(x=AP, y=rich)) + geom_smooth(method=lm, formula = y ~ poly(x, 3) , se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Species Richness") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) plot_5 <- ggplot(msterinvert_TRCrm, aes(x=AP, y=shannon)) + geom_smooth(method=lm, formula = y ~ poly(x, 3) , se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Shannon Entropy") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) plot_6 <- ggplot(msterinvert_TRCrm, aes(x=AP, y=simpson)) + geom_smooth(method=lm, formula = y ~ poly(x, 3) , se=FALSE, color = "grey", linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Precipitation (cm/yr)") + scale_y_continuous(name = "Gini-Simpson Index") + theme_classic() + theme(legend.position="none") + theme(text = element_text(size = 20)) grid.arrange(plot_4, plot_5, plot_6, nrow=1) # Removal of TRC causes linear regressions between preciptation and diversity to have p-values greater than 0.05 and less than 0.10. However, the hump-shape is still evident and the relationship is still better characterized by an exponential regression. # Conclusion: Invertebrate biodiversity does not exhibit a straightforward linear relationship with annual precipitation, but we observed the highest inverterbate diversity at sites within the middle of the rainfall gradient. d <- select(msterinvert, AP, rich, shannon, simpson) # create data frame with dependent variable in column 1 and independent variables for linear regression in columns 2:length(d) # make data frame for model outputs predictor <- names(d[,-1]) estimate <- rep(0,length(d[,-1])) r2 <- rep(0,length(d[,-1])) f.stat <- rep(0,length(d[,-1])) df <- rep(0,length(d[,-1])) p.value <- rep(0,length(d[,-1])) table.lm <- data.frame(predictor, estimate, df, r2, f.stat, p.value) d.var <- noquote(names(d)[1]) for(i in 2:length(d)){ i.var <- noquote(names(d)[i]) formula = paste0(d.var, " ~ ", i.var) regression <- lm(formula, data=d) j <- i-1 table.lm$estimate[j] <- summary(regression)$coefficients[2,1] table.lm$df[j] <- paste(summary(regression)$df[1], summary(regression)$df[2]) table.lm$r2[j] <- summary(regression)$r.squared table.lm$f.stat[j] <- summary(regression)$fstatistic[1] table.lm$p.value[j] <- summary(regression)$coefficients[2,4] table.lm$p.value[j] <- pf(summary(regression)$fstatistic[1], summary(regression)$fstatistic[2], summary(regression)$fstatistic[3], lower.tail=FALSE) } invert_diversity_vs_precip <- table.lm write_csv(invert_diversity_vs_precip, "invert_diversity_vs_precip.csv") # ::::::::::::::::::::::::::::::::::::::::::::: # Environmental Predictor Linear Regressions: # :::::::::::::::::::::::::::::::::::::::::::: # Running a for loop to run linear regression and export summary statistics to identify significant corelations d <- select(msterinvert, rich, AP:Rosgen.Index) # create data frame with dependent variable in column 1 and independent variables for linear regression in columns 2:length(d) # make data frame for model outputs predictor <- names(d[,-1]) estimate <- rep(0,length(d[,-1])) r2 <- rep(0,length(d[,-1])) f.stat <- rep(0,length(d[,-1])) df <- rep(0,length(d[,-1])) p.value <- rep(0,length(d[,-1])) table.lm <- data.frame(predictor, estimate, df, r2, f.stat, p.value) d.var <- noquote(names(d)[1]) for(i in 2:length(d)){ i.var <- noquote(names(d)[i]) formula = paste0(d.var, " ~ ", i.var) regression <- lm(formula, data=d) j <- i-1 table.lm$estimate[j] <- summary(regression)$coefficients[2,1] table.lm$df[j] <- paste(summary(regression)$df[1], summary(regression)$df[2]) table.lm$r2[j] <- summary(regression)$r.squared table.lm$f.stat[j] <- summary(regression)$fstatistic[1] table.lm$p.value[j] <- summary(regression)$coefficients[2,4] table.lm$p.value[j] <- pf(summary(regression)$fstatistic[1], summary(regression)$fstatistic[2], summary(regression)$fstatistic[3], lower.tail=FALSE) } table.rich <- table.lm # export table write_xlsx(as.data.frame(table.rich),"invert_rich_v_environment.xlsx") d <- select(msterinvert, shannon, AP:Rosgen.Index) # create data frame with dependent variable in column 1 and independent variables for linear regression in columns 2:length(d) # make data frame for model outputs predictor <- names(d[,-1]) estimate <- rep(0,length(d[,-1])) r2 <- rep(0,length(d[,-1])) f.stat <- rep(0,length(d[,-1])) df <- rep(0,length(d[,-1])) p.value <- rep(0,length(d[,-1])) table.lm <- data.frame(predictor, estimate, df, r2, f.stat, p.value) d.var <- noquote(names(d)[1]) for(i in 2:length(d)){ i.var <- noquote(names(d)[i]) formula = paste0(d.var, " ~ ", i.var) regression <- lm(formula, data=d) j <- i-1 table.lm$estimate[j] <- summary(regression)$coefficients[2,1] table.lm$df[j] <- paste(summary(regression)$df[1], summary(regression)$df[2]) table.lm$r2[j] <- summary(regression)$r.squared table.lm$f.stat[j] <- summary(regression)$fstatistic[1] table.lm$p.value[j] <- summary(regression)$coefficients[2,4] table.lm$p.value[j] <- pf(summary(regression)$fstatistic[1], summary(regression)$fstatistic[2], summary(regression)$fstatistic[3], lower.tail=FALSE) } table.shannon <- table.lm # export table write_xlsx(as.data.frame(table.shannon),"invert_shannon_v_environment.xlsx") d <- select(msterinvert, simpson, AP:Rosgen.Index) # create data frame with dependent variable in column 1 and independent variables for linear regression in columns 2:length(d) # make data frame for model outputs predictor <- names(d[,-1]) estimate <- rep(0,length(d[,-1])) r2 <- rep(0,length(d[,-1])) f.stat <- rep(0,length(d[,-1])) df <- rep(0,length(d[,-1])) p.value <- rep(0,length(d[,-1])) table.lm <- data.frame(predictor, estimate, df, r2, f.stat, p.value) d.var <- noquote(names(d)[1]) for(i in 2:length(d)){ i.var <- noquote(names(d)[i]) formula = paste0(d.var, " ~ ", i.var) regression <- lm(formula, data=d) j <- i-1 table.lm$estimate[j] <- summary(regression)$coefficients[2,1] table.lm$df[j] <- paste(summary(regression)$df[1], summary(regression)$df[2]) table.lm$r2[j] <- summary(regression)$r.squared table.lm$f.stat[j] <- summary(regression)$fstatistic[1] table.lm$p.value[j] <- summary(regression)$coefficients[2,4] table.lm$p.value[j] <- pf(summary(regression)$fstatistic[1], summary(regression)$fstatistic[2], summary(regression)$fstatistic[3], lower.tail=FALSE) } table.simpson <- table.lm # export table write_xlsx(as.data.frame(table.simpson), "invert_simpson_v_environment.xlsx") # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - # # Supplemental Visualizations # Plot significant invert diversity vs environment regressions plot_iap <- ggplot(msterinvert, aes(x=AP, y=shannon)) + geom_point(shape=19,aes(size=.1), ) + scale_x_continuous(name = "Precipitation") + scale_y_continuous(name = expression('Shannon Entropy')) + theme_classic() + theme(text = element_text(size = 20)) + theme(legend.position="none") plot_iLFPP <- ggplot(msterinvert, aes(x=LFPP, y=shannon)) + geom_smooth(method=lm, se=FALSE, color = 'grey', linetype = 'dashed') + geom_point(shape=19,aes(size=.1)) + scale_x_continuous(name = "Low Flow Pulse %") + scale_y_continuous(name = expression('Shannon Entropy')) + theme_classic() + theme(text = element_text(size = 20)) + theme(legend.position="none") grid.arrange(plot_iap,plot_iLFPP, ncol=2) # End # - # - # - # - # - # - # - # - # - # - # - # - # # - # - # - # - #