--- title: "Cano_Declines_PeerJ" author: "Caroline Palmer" date: "2025-03-13" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r load_libraries} # Load necessary libraries library(tidyverse) library(lubridate) library(brms) library(patchwork) library(ggplot2) library(dplyr) library(vegan) # For Shannon diversity index calculation library(tidyr) library(broom) library(purrr) library(readr) library(car) library(emmeans) library(lme4) library(FSA) # For Dunn's post-hoc test # Ensure the brms package is installed if (!requireNamespace("brms", quietly = TRUE)) { install.packages("brms") } ``` ```{r preprocess data} # Read and preprocess data prevsub <- read_csv("prevsub.csv", col_types = cols( bleached = col_double(), diseased = col_double(), coral = col_double(), mean_temp = col_double(), max_temp = col_double(), min_temp = col_double(), macroalgae = col_double(), turf = col_double(), cyanobacteria = col_double(), caulerpa = col_double(), deadcoral = col_double(), cca = col_double(), other = col_double(), rubble = col_double(), sand = col_double(), rock = col_double(), proppoc = col_double(), proppor = col_double(), proppav = col_double(), proppsam = col_double(), propoth = col_double(), poc_mbb = col_double(), por_mbb = col_double(), pav_mbb = col_double(), psam_mbb = col_double(), other_mbb = col_double(), poc_bleached = col_double(), por_bleached = col_double(), pav_bleached = col_double(), psamm_bleached = col_double(), other_corals_bleaching = col_double(), totalpoc = col_double(), totalpor = col_double(), totalpav = col_double(), totalpsamm = col_double(), totalother = col_double(), mean_temp = col_double(), # Mean temperature as numeric max_temp = col_double(), # Max temperature as numeric min_temp = col_double(), # Min temperature as numeric dive_temp = col_double(), # Dive temperature as numeric ph = col_double(), # pH as numeric sal = col_double(), # Salinity as numeric vis = col_double(), # Visibility as numeric oxi = col_double(), # Oxygen as numeric nit = col_double(), # Nitrate as numeric pho = col_double(), # Phosphate as numeric mean_mon_sst = col_double(), # Monthly mean SST max_mon_sst = col_double(), # Monthly max SST min_mon_sst = col_double(), site = col_character(), date = col_character() # Treat 'date' as character initially )) # Parse 'date' into a proper Date format prevsub <- prevsub %>% mutate(date = dmy(date)) # Remove rows where all values are NA prevsub <- prevsub %>% filter(rowSums(is.na(.)) != ncol(.)) # Ensure 'date' format is consistent (redundant if using dmy, but kept as a check) prevsub <- prevsub %>% mutate(date = as.Date(date, format = "%d/%m/%Y")) # Convert site to factor prevsub <- prevsub %>% mutate(site = as.factor(site)) # Add scaled date prevsub <- prevsub %>% mutate( z_date = scale(as.numeric(date)) ) # Ensure rows with missing 'bleached' but valid 'coral' are retained prevsub <- prevsub %>% filter(!is.na(z_date) & is.finite(z_date)) # Check for invalid dates invalid_dates <- prevsub %>% filter(is.na(date)) if (nrow(invalid_dates) > 0) { print("Invalid dates found:") print(invalid_dates) # Remove invalid dates prevsub <- prevsub %>% filter(!is.na(date)) } else { print("All dates are valid!") } # Step 1: Convert NA to structural zeros for total bleaching columns bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching") total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother") for (i in seq_along(bleaching_cols)) { prevsub[[bleaching_cols[i]]] <- ifelse( is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[bleaching_cols[i]]] ) } # Step 2: Convert NA to structural zeros for mostly bleached and bleached data species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb") for (i in seq_along(species_cols)) { prevsub[[species_cols[i]]] <- ifelse( is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[species_cols[i]]] ) } # Step 3: Constrain proportions for beta regression for additional columns additional_proportion_cols <- c( "coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca", "rubble", "rock", "sand", "deadcoral", "other", "proppoc", "proppor", "proppav", "proppsam", "propoth", "poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb", "poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching" ) prevsub <- prevsub %>% mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001))) # Step 4: Create structural zero indicators for all relevant columns for (i in seq_along(total_cols)) { prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse( prevsub[[total_cols[i]]] == 0, 1, 0 ) prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse( prevsub[[total_cols[i]]] == 0, 1, 0 ) } # Constrain response variables for zero_inflated_beta family prevsub <- prevsub %>% mutate( macroalgae = pmax(pmin(macroalgae, 0.9999), 0.0001), caulerpa = pmax(pmin(caulerpa, 0.9999), 0.0001), turf = pmax(pmin(turf, 0.9999), 0.0001), cyanobacteria = pmax(pmin(cyanobacteria, 0.9999), 0.0001), cca = pmax(pmin(cca, 0.9999), 0.0001) ) # Filter for selected sites selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo") filtered_prevsub <- prevsub %>% filter(site %in% selected_sites) %>% mutate(site = droplevels(site)) filtered_prevsub_coral <- filtered_prevsub %>% filter(!is.na(coral)) # Save removed data for later use removed_data <- filtered_prevsub %>% filter(site == "Barco Somero" & format(date, "%Y-%m") == "2024-03") # Proceed with filtered data for analysis filtered_prevsub <- filtered_prevsub %>% filter(!(site == "Barco Somero" & format(date, "%Y-%m") == "2024-03")) # Save removed data for later use removed_data_ancla <- filtered_prevsub %>% filter(site == "Ancla" & format(date, "%Y-%m") == "2024-04") # Proceed with filtered data for analysis (excluding Ancla's April 2024 data) filtered_prevsub <- filtered_prevsub %>% filter(!(site == "Ancla" & format(date, "%Y-%m") == "2024-04")) ``` #Coral Diversity Analysis Full (Shannon) ```{r shannon-analysis-Tot_Counts, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(ggplot2) library(tidyr) library(vegan) # For Shannon diversity index calculation library(car) library(emmeans) library(lme4) library(FSA) # For Dunn's post-hoc test # Define the species count columns species_count_columns <- c( "Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus", "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians", "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", "Tot_psammacorastellata", "Tot_psammacoraprofundacella", "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea", "Tot_other" ) # Create a copy of the dataset for Shannon Index analysis filtered_prevsub_shannon <- filtered_prevsub %>% drop_na(all_of(species_count_columns)) %>% mutate(Shannon_Index_Healthy = diversity(select(., all_of(species_count_columns)), index = "shannon")) %>% group_by(site) %>% filter(n() > 3) %>% # Remove sites with fewer than 3 observations ungroup() # Save computed Shannon Index data write.csv(filtered_prevsub_shannon, "Shannon_Index_Healthy_Data.csv", row.names = FALSE) cat("Shannon Index Healthy Data saved as 'Shannon_Index_Healthy_Data.csv'\n") # Summarize Shannon Index statistics by site shannon_summary_healthy <- filtered_prevsub_shannon %>% group_by(site) %>% summarise( mean_shannon = mean(Shannon_Index_Healthy, na.rm = TRUE), sd_shannon = sd(Shannon_Index_Healthy, na.rm = TRUE), n = n() ) %>% arrange(desc(mean_shannon)) print("Summary statistics for Shannon Index (Healthy) by site:") print(shannon_summary_healthy) # Violin Plot: Shannon Index by Site (Healthy Corals) p_healthy <- ggplot(filtered_prevsub_shannon, aes(x = site, y = Shannon_Index_Healthy, fill = site)) + geom_violin(trim = FALSE, scale = "width", alpha = 0.6, width = 0.9) + geom_jitter(aes(fill = site), color = "black", size = 2.5, shape = 21, width = 0.2, alpha = 0.7) + stat_summary(fun.data = "mean_cl_boot", colour = "black", fill = "white", size = 1, shape = 21) + scale_fill_brewer(palette = "Paired") + theme_minimal(base_size = 15) + labs(title = "Shannon Diversity Index by Site", x = "Site", y = "Shannon Index") + theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) ggsave("Violin_Plot_Shannon_Index_Tot.png", plot = p_healthy, width = 12, height = 6, dpi = 300) cat("Violin plot saved as 'Violin_Plot_Shannon_Index_Tot.png'\n") print(p_healthy) # ANOVA: Testing for Differences Between Sites anova_site_healthy <- aov(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon) anova_summary_healthy <- summary(anova_site_healthy) print("ANOVA Results for Shannon Index (Healthy) by Site:") print(anova_summary_healthy) # Check ANOVA assumptions shapiro_test_healthy <- shapiro.test(residuals(anova_site_healthy)) print("Shapiro-Wilk Test for Normality of Residuals:") print(shapiro_test_healthy) levene_test_healthy <- leveneTest(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon) print("Levene's Test for Homogeneity of Variance:") print(levene_test_healthy) # Post-hoc tests based on assumptions if (anova_summary_healthy[[1]]$`Pr(>F)`[1] < 0.05 & shapiro_test_healthy$p.value > 0.05 & levene_test_healthy$`Pr(>F)`[1] > 0.05) { posthoc_healthy <- emmeans(anova_site_healthy, pairwise ~ site) print("Post-hoc comparisons:") print(posthoc_healthy) } else { print("ANOVA not significant or assumptions not met, considering Kruskal-Wallis test instead.") # Kruskal-Wallis test kruskal_test_healthy <- kruskal.test(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon) print("Kruskal-Wallis Test Results for Shannon Index by Site:") print(kruskal_test_healthy) # Dunn's post-hoc if Kruskal is significant if (kruskal_test_healthy$p.value < 0.05) { posthoc_dunn_healthy <- dunnTest(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon, method = "bonferroni") print("\nPost-hoc pairwise comparisons (Dunn's Test):") print(posthoc_dunn_healthy$res) # Save results dunn_results <- posthoc_dunn_healthy$res %>% as.data.frame() %>% rename(Comparison = Comparison, Z = Z, P.adj = P.adj) write.csv(dunn_results, "Dunn_Test_Summary.csv", row.names = FALSE) cat("Dunn's test summary saved as 'Dunn_Test_Summary.csv'\n") } else { print("Kruskal-Wallis Test not significant, skipping post-hoc comparisons.") } } # Mixed-effects model lmer_model_healthy <- lmer(Shannon_Index_Healthy ~ site + z_date + (1 | site), data = filtered_prevsub_shannon) print("Mixed-Effects Model Summary:") print(summary(lmer_model_healthy)) # Scatterplot of Shannon over time ggplot(filtered_prevsub_shannon, aes(x = z_date, y = Shannon_Index_Healthy, color = site)) + geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed") + theme_minimal() + labs( title = "Shannon Diversity Index Over Time (z_date)", x = "Scaled Date (z_date)", y = "Shannon Index" ) + theme(legend.position = "bottom") # Coral Cover Over Time (from subset) ggplot(filtered_prevsub_shannon, aes(x = z_date, y = coral)) + geom_point() + geom_smooth(method = "loess", se = FALSE) + theme_minimal() + labs( title = "Coral Cover Over Time (Healthy Corals Subset)", x = "Scaled Date (z_date)", y = "Coral Cover" ) ``` #Species Presence Table Healthy Counts ```{r Species_presence_table, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(tidyr) library(stringr) library(knitr) # Define species columns explicitly using the "Tot_" data species_columns <- c("Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus", "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians", "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", "Tot_psammacorastellata", "Tot_psammacoraprofundacella", "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea", "Tot_other") # Remove rows with NA values in species count columns filtered_prevsub <- filtered_prevsub %>% drop_na(all_of(species_columns)) # Convert count data into presence/absence (1 = present, 0 = absent) species_presence <- filtered_prevsub %>% select(site, all_of(species_columns)) %>% # Select only site and species columns group_by(site) %>% summarise(across(everything(), ~ ifelse(sum(. > 0, na.rm = TRUE) > 0, 1, 0))) # Count > 0 = Present (1) # Rename species columns by removing "Tot_" prefix and replacing underscores colnames(species_presence) <- colnames(species_presence) %>% str_replace_all("^Tot_", "") %>% # Remove 'Tot_' prefix str_replace_all("_", " ") # Replace underscores with spaces # Print the corrected summary table print(species_presence) # Save table as CSV file write.csv(species_presence, "Species_Presence_By_Site_Formatted.csv", row.names = FALSE) # Display formatted table in R Markdown kable(species_presence, caption = "Species Presence by Site (1 = Present, 0 = Absent)") ``` # Bleaching prevalence analysis ```{r bleaching models} #Bleaching prevalence by site beta_model_no_time <- brm( formula = bleached ~ 1 + (1 | site), data = filtered_prevsub, family = Beta(), # no zero inflation chains = 4, cores = 4, iter = 4000, warmup = 2000, control = list(adapt_delta = 0.99), seed = 123 ) summary(beta_model_no_time) #### beta_model_no_time predicts an overall bleaching proportion (on average) plus site-level deviations, with no variation over time. bleaching levels vary primarily by site rather than time, with an overall average of about 24% bleaching across sites. ``` #Coral Bleaching Prevalence Plot by time ```{r filter-and-plot-bleaching, message=FALSE, warning=FALSE} library(dplyr) library(ggplot2) # 1) Identify the mean and standard deviation of the original date column original_mean_date <- mean(as.numeric(filtered_prevsub$date), na.rm = TRUE) original_sd_date <- sd(as.numeric(filtered_prevsub$date), na.rm = TRUE) # 2) Filter data: Keep only rows where bleaching data is available and within valid site-specific time ranges filtered_prevsub_small <- filtered_prevsub %>% filter(!is.na(bleached)) %>% group_by(site) %>% mutate( min_zdate = min(z_date, na.rm = TRUE), max_zdate = max(z_date, na.rm = TRUE) ) %>% ungroup() %>% filter(z_date >= min_zdate & z_date <= max_zdate) %>% # Convert z_date back to actual date mutate( actual_date = as.Date(z_date * original_sd_date + original_mean_date, origin = "1970-01-01"), month_year = format(actual_date, "%b %Y") # Extract Month-Year (e.g., "Jan 2024") ) # 3) Plot bleaching trends over time with **no legend** ggplot(filtered_prevsub_small, aes(x = actual_date, y = bleached)) + geom_smooth(method = "lm") + # Linear regression trend line geom_point(size = 5, shape = 21, aes(fill = site)) + # Points colored by site facet_wrap(~site, scales = "free_x") + # Separate panels for each site scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", minor_breaks = "1 month") + # More frequent ticks theme_bw() + labs(y = "Proportion Bleached", x = "Month-Year") + theme( axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), legend.position = "none" # **Removes the legend** ) ``` # ZIB model of bleaching prevalence by species ```{r multivariate_model_total_bleached_species_prev} # Fit the multivariate zero-inflated beta model bleachedspecies_model <- brm( mvbind(poc_bleached, por_bleached, pav_bleached, psamm_bleached, other_corals_bleaching) ~ poc_bleached_structural_zero + por_bleached_structural_zero + pav_bleached_structural_zero + psamm_bleached_structural_zero + other_corals_bleaching_structural_zero + (1 | site), data = filtered_prevsub, family = zero_inflated_beta(), iter = 4000, control = list(adapt_delta = 0.99), cores = 4, seed = 123 ) # Summarize the model summary(bleachedspecies_model) ``` #SIMPER Analysis full model ```{r preprocess_SIMPER_data, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(tidyr) library(lubridate) library(readr) # Read and preprocess data prevsub <- read_csv("prevsub.csv", col_types = cols( bleached = col_double(), diseased = col_double(), coral = col_double(), mean_temp = col_double(), max_temp = col_double(), min_temp = col_double(), macroalgae = col_double(), turf = col_double(), cyanobacteria = col_double(), caulerpa = col_double(), deadcoral = col_double(), cca = col_double(), other = col_double(), rubble = col_double(), sand = col_double(), rock = col_double(), proppoc = col_double(), proppor = col_double(), proppav = col_double(), proppsam = col_double(), propoth = col_double(), poc_mbb = col_double(), por_mbb = col_double(), pav_mbb = col_double(), psam_mbb = col_double(), other_mbb = col_double(), poc_bleached = col_double(), por_bleached = col_double(), pav_bleached = col_double(), psamm_bleached = col_double(), other_corals_bleaching = col_double(), totalpoc = col_double(), totalpor = col_double(), totalpav = col_double(), totalpsamm = col_double(), totalother = col_double(), mean_temp = col_double(), max_temp = col_double(), min_temp = col_double(), dive_temp = col_double(), ph = col_double(), sal = col_double(), vis = col_double(), oxi = col_double(), nit = col_double(), pho = col_double(), mean_mon_sst = col_double(), max_mon_sst = col_double(), min_mon_sst = col_double(), site = col_character(), date = col_character() # Keep date as character initially )) # Convert 'date' column to proper Date format prevsub <- prevsub %>% mutate(date_actual = dmy(date)) # Remove rows where all values are NA prevsub <- prevsub %>% filter(rowSums(is.na(.)) != ncol(.)) # Convert 'site' to a factor prevsub <- prevsub %>% mutate(site = as.factor(site)) # Step 1: Convert NA to structural zeros for total bleaching columns bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching") total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother") for (i in seq_along(bleaching_cols)) { prevsub[[bleaching_cols[i]]] <- ifelse( is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[bleaching_cols[i]]] ) } # Step 2: Convert NA to structural zeros for mostly bleached and bleached data species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb") for (i in seq_along(species_cols)) { prevsub[[species_cols[i]]] <- ifelse( is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[species_cols[i]]] ) } # Step 3: Constrain proportions for beta regression for additional columns additional_proportion_cols <- c( "coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca", "rubble", "rock", "sand", "deadcoral", "other", "proppoc", "proppor", "proppav", "proppsam", "propoth", "poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb", "poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching" ) prevsub <- prevsub %>% mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001))) # Step 4: Create structural zero indicators for all relevant columns for (i in seq_along(total_cols)) { prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse( prevsub[[total_cols[i]]] == 0, 1, 0 ) prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse( prevsub[[total_cols[i]]] == 0, 1, 0 ) } # Constrain response variables for zero_inflated_beta family prevsub <- prevsub %>% mutate( macroalgae = pmax(pmin(macroalgae, 0.9999), 0.0001), caulerpa = pmax(pmin(caulerpa, 0.9999), 0.0001), turf = pmax(pmin(turf, 0.9999), 0.0001), cyanobacteria = pmax(pmin(cyanobacteria, 0.9999), 0.0001), cca = pmax(pmin(cca, 0.9999), 0.0001) ) # Filter for selected sites selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo") filtered_simper_data <- prevsub %>% filter(site %in% selected_sites) %>% mutate(site = droplevels(site)) # Define "Before" and "After" categories based on actual dates filtered_simper_data <- filtered_simper_data %>% mutate(merged_time = case_when( date_actual >= as.Date("2024-03-27") & date_actual <= as.Date("2024-07-18") ~ "Before", date_actual >= as.Date("2024-08-20") ~ "After", TRUE ~ NA_character_ )) %>% drop_na(merged_time) # Check if both time groups exist print(table(filtered_simper_data$merged_time)) # Remove specific dates from Barco Somero & Ancla filtered_simper_data <- filtered_simper_data %>% filter(!(site == "Barco Somero" & format(date_actual, "%Y-%m") == "2024-03")) %>% filter(!(site == "Ancla" & format(date_actual, "%Y-%m") == "2024-04")) ``` ```{r SIMPER_analysis_During_After_Correction_March_12, message=FALSE, warning=FALSE} library(vegan) library(dplyr) library(tidyr) # 1) Time-Based SIMPER (Grouping by Merged Time Period) composition_vars <- c("coral", "bleached", "macroalgae", "turf", "cyanobacteria", "cca", "deadcoral") # Subset data and ensure numeric values simper_data <- filtered_simper_data %>% select(site, merged_time, all_of(composition_vars)) %>% mutate(across(all_of(composition_vars), as.numeric)) %>% # Ensure numeric conversion drop_na() %>% # Remove rows with missing values mutate(merged_time = as.factor(merged_time)) # Convert merged_time to factor # Check if both groups exist before running SIMPER if (length(unique(simper_data$merged_time)) < 2) { stop("Error: SIMPER requires at least two unique time periods ('Before' and 'After').") } # Convert data frame to numeric matrix simper_matrix <- as.matrix(simper_data %>% select(-site, -merged_time)) # Debugging: Check structure of matrix before SIMPER print(str(simper_matrix)) # Run SIMPER analysis simper_results <- simper(simper_matrix, simper_data$merged_time, permutations = 100) # Extract SIMPER summary simper_summary_time <- summary(simper_results) # Convert SIMPER results into a structured data frame simper_df_time <- bind_rows( lapply(names(simper_summary_time), function(grp) { if (!is.null(simper_summary_time[[grp]])) { df <- as.data.frame(simper_summary_time[[grp]]) df$Group <- grp # The comparison label (e.g. "Before vs After") df$Variable <- rownames(simper_summary_time[[grp]]) df } }), .id = "Comparison_Group" ) # Rename columns and select order simper_df_time <- simper_df_time %>% rename(Average_Contribution = average, SD = sd, Consistency_Ratio = ratio, Mean_Before = ava, Mean_After = avb, Cumulative_Contribution = cumsum, P_Value = p) %>% select(Group, Comparison_Group, Variable, Average_Contribution, Consistency_Ratio, Mean_Before, Mean_After, Cumulative_Contribution, P_Value) # Save results write.csv(simper_df_time, "SIMPER_results_Before_After_by_Date.csv", row.names = FALSE) # Print first few rows cat("\n--- SIMPER RESULTS (Before vs After by Date) ---\n") print(head(simper_df_time)) # 2) Site-Level SIMPER (Loop Over Each Site for "Before" vs "After") # Store results for each site site_results_list <- list() # Identify unique sites all_sites <- unique(filtered_simper_data$site) # Loop through each site for (this_site in all_sites) { cat("\nChecking site:", this_site) # Subset for this site and ensure numeric values site_data <- filtered_simper_data %>% filter(site == this_site) %>% select(merged_time, all_of(composition_vars)) %>% mutate(across(all_of(composition_vars), as.numeric)) %>% # Convert to numeric group_by(merged_time) %>% mutate(across(all_of(composition_vars), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>% # Replace NAs with time-period mean ungroup() %>% mutate(merged_time = as.factor(merged_time)) # Debugging: Check if missing values remain print(anyNA(site_data)) # Should return FALSE # Debugging: Print Before/After counts for each site print(table(site_data$merged_time)) # If only one merged time period exists, skip if (length(unique(site_data$merged_time)) < 2) { cat("\nSkipping site:", this_site, "because it does not have both 'Before' and 'After' data.\n") next } # Convert to numeric matrix site_matrix <- as.matrix(site_data %>% select(-merged_time)) # Debugging: Check if numeric conversion was successful print(str(site_matrix)) if (nrow(site_matrix) != length(site_data$merged_time)) { stop(paste("Mismatch between data matrix and merged_time factor for site:", this_site)) } # Run SIMPER for this site site_simper <- simper(site_matrix, site_data$merged_time, permutations = 100) # Summarize results site_summary <- summary(site_simper) # Convert site-level SIMPER to a data frame site_simper_df <- bind_rows( lapply(names(site_summary), function(grp) { if (!is.null(site_summary[[grp]])) { df <- as.data.frame(site_summary[[grp]]) df$Comparison <- grp df$Variable <- rownames(site_summary[[grp]]) df } }), .id = "Comparison_Group" ) %>% rename(Average_Contribution = average, SD = sd, Consistency_Ratio = ratio, Mean_Before = ava, Mean_After = avb, Cumulative_Contribution = cumsum, P_Value = p) # Store results site_results_list[[this_site]] <- site_simper_df } # Combine site-level results into a single data frame site_simper_all <- bind_rows(lapply(names(site_results_list), function(s) { out_df <- site_results_list[[s]] out_df$Site <- s out_df })) cat("\nSummary of Top Contributors to Temporal Change:\n") print(head(simper_df_time)) # Save site-based results write.csv(site_simper_all, "SIMPER_results_Before_After_by_Site_Date.csv", row.names = FALSE) # Print a sample cat("\n--- SITE-BASED SIMPER RESULTS (Before vs After by Date) ---\n") if (nrow(site_simper_all) > 0) { print(head(site_simper_all)) } else { cat("No sites had both 'Before' and 'After' data available.\n") } ``` ###SIMPER Before March to Sept After Dec2024 to Feb 2025 ```{r SIMPER_Preprocessing_and_Analysis_DEC2024, message=FALSE, warning=FALSE} # --- Load required libraries --- library(dplyr) library(tidyr) library(lubridate) library(readr) library(vegan) # --- Read and preprocess data --- prevsub <- read_csv("prevsub.csv", col_types = cols( bleached = col_double(), diseased = col_double(), coral = col_double(), mean_temp = col_double(), max_temp = col_double(), min_temp = col_double(), macroalgae = col_double(), turf = col_double(), cyanobacteria = col_double(), caulerpa = col_double(), deadcoral = col_double(), cca = col_double(), other = col_double(), rubble = col_double(), sand = col_double(), rock = col_double(), proppoc = col_double(), proppor = col_double(), proppav = col_double(), proppsam = col_double(), propoth = col_double(), poc_mbb = col_double(), por_mbb = col_double(), pav_mbb = col_double(), psam_mbb = col_double(), other_mbb = col_double(), poc_bleached = col_double(), por_bleached = col_double(), pav_bleached = col_double(), psamm_bleached = col_double(), other_corals_bleaching = col_double(), totalpoc = col_double(), totalpor = col_double(), totalpav = col_double(), totalpsamm = col_double(), totalother = col_double(), dive_temp = col_double(), ph = col_double(), sal = col_double(), vis = col_double(), oxi = col_double(), nit = col_double(), pho = col_double(), mean_mon_sst = col_double(), max_mon_sst = col_double(), min_mon_sst = col_double(), site = col_character(), date = col_character() )) # Convert and clean dates prevsub <- prevsub %>% mutate(date_actual = dmy(date)) %>% filter(rowSums(is.na(.)) != ncol(.)) %>% mutate(site = as.factor(site)) # --- Structural zero imputation for bleaching and species --- bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching") species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb") total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother") for (i in seq_along(bleaching_cols)) { prevsub[[bleaching_cols[i]]] <- ifelse(is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[bleaching_cols[i]]]) prevsub[[species_cols[i]]] <- ifelse(is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[species_cols[i]]]) } # --- Constrain values for beta regression --- additional_proportion_cols <- c("coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca", "rubble", "rock", "sand", "deadcoral", "other", "proppoc", "proppor", "proppav", "proppsam", "propoth", species_cols, bleaching_cols) prevsub <- prevsub %>% mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001))) # --- Add structural zero flags --- for (i in seq_along(total_cols)) { prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse(prevsub[[total_cols[i]]] == 0, 1, 0) prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse(prevsub[[total_cols[i]]] == 0, 1, 0) } # --- Filter for selected sites and assign time periods --- selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo") filtered_simper_data_DEC <- prevsub %>% filter(site %in% selected_sites) %>% mutate(site = droplevels(site), merged_time = case_when( date_actual >= as.Date("2024-03-27") & date_actual < as.Date("2024-12-01") ~ "Before", date_actual >= as.Date("2024-12-01") ~ "After", TRUE ~ NA_character_ )) %>% drop_na(merged_time) %>% filter(!(site == "Barco Somero" & format(date_actual, "%Y-%m") == "2024-03"), !(site == "Ancla" & format(date_actual, "%Y-%m") == "2024-04")) # Optional: check balance table(filtered_simper_data_DEC$merged_time) # Export filtered data write_csv(filtered_simper_data_DEC, "filtered_SIMPER_data_DEC_after.csv") # --- SIMPER analysis for Before vs After (DEC onwards) --- composition_vars <- c("coral", "bleached", "macroalgae", "turf", "cyanobacteria", "cca", "deadcoral") simper_data_dec <- filtered_simper_data_DEC %>% select(site, merged_time, all_of(composition_vars)) %>% mutate(across(all_of(composition_vars), as.numeric)) %>% drop_na() %>% mutate(merged_time = as.factor(merged_time)) # Create matrix and run SIMPER simper_matrix <- as.matrix(simper_data_dec %>% select(-site, -merged_time)) simper_results_dec <- simper(simper_matrix, simper_data_dec$merged_time, permutations = 100) simper_summary_dec <- summary(simper_results_dec) # Format SIMPER summary simper_df_dec <- bind_rows( lapply(names(simper_summary_dec), function(grp) { if (!is.null(simper_summary_dec[[grp]])) { df <- as.data.frame(simper_summary_dec[[grp]]) df$Group <- grp df$Variable <- rownames(simper_summary_dec[[grp]]) df } }), .id = "Comparison_Group" ) %>% rename(Average_Contribution = average, SD = sd, Consistency_Ratio = ratio, Mean_Before = ava, Mean_After = avb, Cumulative_Contribution = cumsum, P_Value = p) %>% select(Group, Comparison_Group, Variable, Average_Contribution, Consistency_Ratio, Mean_Before, Mean_After, Cumulative_Contribution, P_Value) write.csv(simper_df_dec, "SIMPER_results_Dec2024_Before_After.csv", row.names = FALSE) # --- Site-level SIMPER loop --- site_results_list_dec <- list() all_sites <- unique(filtered_simper_data_DEC$site) for (this_site in all_sites) { site_data <- filtered_simper_data_DEC %>% filter(site == this_site) %>% select(merged_time, all_of(composition_vars)) %>% mutate(across(all_of(composition_vars), as.numeric)) %>% group_by(merged_time) %>% mutate(across(all_of(composition_vars), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>% ungroup() %>% mutate(merged_time = as.factor(merged_time)) if (length(unique(site_data$merged_time)) < 2) next site_matrix <- as.matrix(site_data %>% select(-merged_time)) site_simper <- simper(site_matrix, site_data$merged_time, permutations = 100) site_summary <- summary(site_simper) site_simper_df <- bind_rows( lapply(names(site_summary), function(grp) { if (!is.null(site_summary[[grp]])) { df <- as.data.frame(site_summary[[grp]]) df$Comparison <- grp df$Variable <- rownames(site_summary[[grp]]) df } }), .id = "Comparison_Group" ) %>% rename(Average_Contribution = average, SD = sd, Consistency_Ratio = ratio, Mean_Before = ava, Mean_After = avb, Cumulative_Contribution = cumsum, P_Value = p) site_results_list_dec[[this_site]] <- site_simper_df } # Combine and export all site-level SIMPER outputs site_simper_all_dec <- bind_rows(lapply(names(site_results_list_dec), function(s) { out_df <- site_results_list_dec[[s]] out_df$Site <- s out_df })) write.csv(site_simper_all_dec, "SIMPER_results_Dec2024_Site_Based.csv", row.names = FALSE) ``` #### Model Testing for Coral Cover through time ```{r coral_cover_linear_model filtered} # ----------------------------------------------------------- # 1) Load necessary libraries # ----------------------------------------------------------- library(dplyr) library(tidyr) library(ggplot2) library(tidybayes) library(brms) # ----------------------------------------------------------- # 2) Fit a linear zero-inflated beta model # ----------------------------------------------------------- # Make sure your dataset 'filtered_prevsub' has columns: # coral (proportion or 0-1 scale), # z_date (numeric or scaled time), # site (factor), # and that you've removed any NA rows if needed. coralcov_linear <- brm( formula = coral ~ z_date + (1 | site), data = filtered_prevsub_coral, family = zero_inflated_beta(link = "logit"), chains = 4, iter = 4000, seed = 123, control = list(adapt_delta = 0.95) ) summary(coralcov_linear) # ----------------------------------------------------------- # 3) Create a new data frame for prediction # ----------------------------------------------------------- zdate_min <- min(filtered_prevsub$z_date, na.rm = TRUE) zdate_max <- max(filtered_prevsub$z_date, na.rm = TRUE) zdate_seq <- seq(zdate_min, zdate_max, length.out = 100) newdata_plot <- expand.grid( z_date = zdate_seq, site = unique(filtered_prevsub$site) ) # ----------------------------------------------------------- # 4) Obtain fitted draws on the response scale # ----------------------------------------------------------- # Note: 'add_fitted_draws()' is deprecated; consider using 'add_epred_draws()' in new code. fitted_draws_linear <- add_fitted_draws( model = coralcov_linear, newdata = newdata_plot, re_formula = NULL, # include random intercept by site scale = "response" # get predictions on [0..1] scale ) # ----------------------------------------------------------- # 5) Summarize posterior draws # ----------------------------------------------------------- fitted_summary_linear <- fitted_draws_linear %>% group_by(z_date, site) %>% summarise( mean = mean(.value), lower = quantile(.value, 0.025), upper = quantile(.value, 0.975), .groups = "drop" ) # ----------------------------------------------------------- # 6) Plot the fitted linear trend # ----------------------------------------------------------- ggplot(fitted_summary_linear, aes(x = z_date, y = mean, color = site, fill = site)) + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, color = NA) + geom_line(size = 1) + labs( title = "Fitted Linear Curve (Zero-Inflated Beta Model)", x = "z_date (Scaled Time)", y = "Predicted Coral Cover" ) + theme_minimal(base_size = 14) + scale_color_brewer(palette = "Paired") + scale_fill_brewer(palette = "Paired") ``` ##Coral cover ZIB model ```{r coral_cover_model filtered} # Coral cover Model 2: Coral cover through time and site effects for filtered sites coralcov2 <- brm( coral ~ poly(z_date, 2) + (1 | site), data = filtered_prevsub_coral, family = zero_inflated_beta(), control = list(adapt_delta = 0.99), cores = 4, iter = 4000, seed = 123 ) summary(coralcov2) ``` ```{r coral_cov_time} # Coral cover by time for filtered data m1 <- brm( coral ~ z_date + (1 | site), data = filtered_prevsub_coral, family = zero_inflated_beta(), prior = prior(normal(0, 1), class = b) + prior(gamma(4, 0.1), class = phi), cores = 4, seed = 9, control = list(adapt_delta = 0.95), iter = 10000, thin = 10, warmup = 2000 ) summary(m1) conditional_effects(m1) ``` ```{r polynomial_random_slopes, echo=TRUE, message=FALSE, warning=FALSE} # ---------------------------------------------------------- # 1) Load required libraries # ---------------------------------------------------------- library(dplyr) library(tidyr) library(ggplot2) library(tidybayes) # for add_*_draws library(brms) # for Bayesian regression modeling library(loo) # for leave-one-out cross-validation # ---------------------------------------------------------- # 2) Define and Fit the Polynomial + Random Slopes Model # ---------------------------------------------------------- # We assume your data frame is 'filtered_prevsub' with: # - 'coral' (proportion), # - 'z_date' (scaled time), # - 'site' (factor), # - possibly zero-inflation if needed # Example formula: polynomial in z_date + random intercept + random slopes for the polynomial # The notation (poly(z_date, 2, raw = TRUE) || site) # means random intercept plus random slopes for the polynomial terms, uncorrelated. # If you want correlated random slopes, use a single vertical bar | instead of ||. coralcov_poly_randslope <- brm( formula = coral ~ poly(z_date, 2, raw = TRUE) + (poly(z_date, 2, raw = TRUE) || site), data = filtered_prevsub_coral, family = zero_inflated_beta(link = "logit"), # or just beta(link="logit") if no zero inflation chains = 4, iter = 4000, warmup = 2000, seed = 123, control = list(adapt_delta = 0.95) ) # Print a quick summary of the new model print(summary(coralcov_poly_randslope)) # ---------------------------------------------------------- # 3) Compare the New Model with Existing Models via LOO # ---------------------------------------------------------- # Suppose you already have: # - coralcov_linear (linear model, random intercept) # - coralcov2 (polynomial model, random intercept) # - coralcov_linear_slope (linear model, random slope) # - coralcov_poly_randslope (this new polynomial + random slope model) # Compute LOO for the new model loo_poly_randslope <- loo(coralcov_poly_randslope) # Compute LOO for m1 loo_m1 <- loo(m1) # For example, compare with coralcov2: loo_poly2 <- loo(coralcov2) loo_linear <- loo(coralcov_linear) # Compare model_comp <- loo_compare(loo_m1, loo_poly_randslope, loo_linear, loo_poly2) cat("\n--- LOO Comparison: coralcov_poly_randslope vs coralcov2 ---\n") print(model_comp) ``` ```{r coral_cover_Linear Regressions} library(dplyr) library(broom) library(purrr) library(tidyr) library(ggplot2) # Run linear regression for each site and extract summary statistics lm_results <- filtered_prevsub_coral %>% group_by(site) %>% nest() %>% mutate( n = map_int(data, nrow), # Count number of observations per site model = map(data, ~ tryCatch(lm(coral ~ z_date, data = .), error = function(e) NULL)), # Fit model results = map(model, ~ if (!is.null(.)) tidy(.) else NULL) # Extract summary ) %>% unnest(results) %>% filter(term == "z_date") %>% select(site, n, term, estimate, std.error, statistic, p.value) # Keep relevant stats # Rename columns for clarity colnames(lm_results) <- c("Site", "n", "Term", "Beta", "SE", "t_value", "p_value") # Print the final results print(lm_results) # Save to CSV write.csv(lm_results, "site_specific_coral_trends_with_n.csv", row.names = FALSE) # Plot trends with confidence intervals and sample sizes ggplot(lm_results, aes(x = Site, y = Beta, ymin = Beta - SE, ymax = Beta + SE)) + geom_point(size = 3, color = "blue") + geom_errorbar(width = 0.3, color = "black") + geom_hline(yintercept = 0, linetype = "dashed", color = "red") + coord_flip() + labs(title = "Site-Specific Coral Cover Trends Over Time", subtitle = "Numbers indicate sample size (n)", x = "Site", y = "Trend (Change in Coral Cover per Unit Time)") + theme_minimal() + geom_text(aes(label = paste0("n=", n)), hjust = -0.2, size = 4) ``` ##Coral cover slopes by site ```{r coral_Site_random_slopes_Model_polynomial_regressions} library(brms) library(tidybayes) library(dplyr) # Extract fixed effects using correct parameter names fixed_effects <- fixef(coralcov_poly_randslope)[c("polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2"), "Estimate"] # Extract site-level random effects site_effects <- as.data.frame(ranef(coralcov_poly_randslope)$site[, , 1]) # Extracting mean estimates # Rename columns correctly colnames(site_effects) <- c("Intercept", "polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2") site_effects$Site <- rownames(site_effects) # Compute site-specific slopes by adding random effects to fixed effects site_effects$Final_Slope_Linear <- fixed_effects["polyz_date2rawEQTRUE1"] + site_effects$polyz_date2rawEQTRUE1 site_effects$Final_Slope_Quadratic <- fixed_effects["polyz_date2rawEQTRUE2"] + site_effects$polyz_date2rawEQTRUE2 # Extract only the "Estimate" values (first column in the second dimension) site_effects <- as.data.frame(ranef(coralcov_poly_randslope)$site[, "Estimate", ]) # Rename columns correctly colnames(site_effects) <- c("Intercept", "polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2") site_effects$Site <- rownames(site_effects) # Compute site-specific slopes by adding random effects to fixed effects site_effects$Final_Slope_Linear <- fixed_effects["polyz_date2rawEQTRUE1"] + site_effects$polyz_date2rawEQTRUE1 site_effects$Final_Slope_Quadratic <- fixed_effects["polyz_date2rawEQTRUE2"] + site_effects$polyz_date2rawEQTRUE2 # Save site-specific trends to CSV write.csv(site_effects, "site_specific_trends.csv", row.names = FALSE) # Print results print(site_effects) ``` ```{r coral_Site_polynomial plots} library(ggplot2) library(scales) # Custom function to determine appropriate date breaks date_breaks_fn <- function(x) { rng <- range(x, na.rm = TRUE) if(as.numeric(rng[2] - rng[1]) > 730) { # More than 2 years of data return(seq(rng[1], rng[2], by = "6 months")) } else { return(seq(rng[1], rng[2], by = "1 month")) } } # Create a quadratic trend plot of coral cover over time, faceted by site coralcover_plot2b <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE) + # Quadratic fit geom_point(size = 4, shape = 21, aes(fill = site)) + facet_wrap(~site, scales = "free_x") + # Free x-axis, same y-axis theme_bw(base_size = 12) + labs(y = "Proportion Coral Cover", x = "Date") + guides(fill = "none") + scale_x_date( date_labels = "%b %Y", breaks = date_breaks_fn ) + scale_y_continuous( limits = c(0, 0.6), expand = expansion(mult = c(0, 0.05)) ) + theme( axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), strip.text = element_text(size = 10) ) # Display the plot print(coralcover_plot2b) # Save the plot to a file ggsave( filename = "coral_cover_plot_quadratic.png", plot = coralcover_plot2b, width = 12, height = 14, dpi = 300 ) ``` ```{r coral_cover_plot} coralcover_plot1 <- ggplot(filtered_prevsub_coral, aes(x = z_date, y = coral)) + geom_smooth(method = "lm") + geom_point(size = 5, shape = 21, aes(fill = site)) + facet_wrap(~site, scales = "free_x") + theme_bw() + labs(y = "Proportion Coral Cover", x = "Scaled Date") + guides(fill = "none") + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) coralcover_plot1 ``` ### Coral Cover: Temporal, depth and algal trends ```{r coral_cover_plot_actual date} library(tidyverse) library(lubridate) library(scales) # Custom function to determine appropriate date breaks date_breaks_fn <- function(x) { rng <- range(x, na.rm = TRUE) if(as.numeric(rng[2] - rng[1]) > 730) { # More than 2 years of data return(seq(rng[1], rng[2], by = "6 months")) } else { return(seq(rng[1], rng[2], by = "1 month")) } } # Create a plot of coral cover over time with dynamic date breaks, faceted by site coralcover_plot2 <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) + geom_smooth(method = "lm") + geom_point(size = 5, shape = 21, aes(fill = site)) + facet_wrap(~site, scales = "free_x") + theme_bw(base_size = 14) + labs(y = "Proportion Coral Cover", x = "Date") + guides(fill = "none") + scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) coralcover_plot2 # Save the plot to a file in the working directory ggsave(filename = "coral_cover_plot.png", plot = coralcover_plot2, width = 10, height = 7, dpi = 300) # Create a plot of coral cover over time with dynamic date breaks, faceted by site coralcover_plot2a <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) + geom_smooth(method = "lm") + geom_point(size = 4, shape = 21, aes(fill = site)) + facet_wrap(~site, scales = "free_y") + # Allow free y-axis scaling per site theme_bw(base_size = 12) + labs(y = "Proportion Coral Cover", x = "Date") + guides(fill = "none") + scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) + scale_y_continuous( limits = c(0, NA), # Start y-axis from 0, upper limit auto expand = expansion(mult = c(0.02, 0.1)) # Add padding above and below data ) + theme( axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), strip.text = element_text(size = 10) ) # Display the plot coralcover_plot2 # Display the plot coralcover_plot2a # Save the plot to a file in the working directory ggsave( filename = "coral_cover_plot.png", plot = coralcover_plot2, width = 12, height = 10, dpi = 300 ) # Create a plot of coral cover over time with dynamic date breaks, faceted by site coralcover_plot2b <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) + geom_smooth(method = "lm") + geom_point(size = 4, shape = 21, aes(fill = site)) + facet_wrap(~site, scales = "free_x") + # Free x-axis, same y-axis theme_bw(base_size = 12) + labs(y = "Proportion Coral Cover", x = "Date") + guides(fill = "none") + scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) + scale_y_continuous( limits = c(0, 0.6), # Fixed y-axis range expand = expansion(mult = c(0, 0.05)) # Slight padding above the highest value ) + theme( axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), strip.text = element_text(size = 10) # Smaller facet labels ) # Display the plot coralcover_plot2b # Save the plot to a file in the working directory with increased height ggsave( filename = "coral_cover_plot_taller.png", plot = coralcover_plot2b, width = 12, # Width remains the same height = 14, # Increased height for better y-axis readability dpi = 300 ) ``` #Linear Regression Models Coral Cover each Site through time (Though not linear trends) ```{r Coral_Cover_LINEAR_trends_site_analysis, echo=TRUE, message=FALSE, warning=FALSE} # Count the number of data points per site site_sample_sizes <- filtered_prevsub_coral %>% group_by(site) %>% summarise(n = n()) # Fit linear models for each site, handling potential errors site_models <- filtered_prevsub_coral %>% group_by(site) %>% group_split() %>% set_names(filtered_prevsub_coral %>% group_by(site) %>% group_keys() %>% pull(site)) %>% map(~ tryCatch(lm(coral ~ z_date, data = .), error = function(e) NULL)) # Remove failed models (if any) valid_models <- site_models[!sapply(site_models, is.null)] # Extract model statistics safely and merge with sample sizes site_trends <- map2_df(valid_models, names(valid_models), function(model, site_name) { if (!is.null(model)) { tidy(model) %>% filter(term == "z_date") %>% # Keep only slope estimates mutate(site = site_name) %>% # Assign correct site name select(site, term, estimate, std.error, statistic, p.value) # Keep relevant columns } }) %>% left_join(site_sample_sizes, by = "site") # Merge with sample size data # Rename columns for clarity colnames(site_trends) <- c("Site", "Term", "Beta", "SE", "t_value", "p_value", "n") # Save output to CSV file write_csv(site_trends, "site_specific_coral_trends_with_n.csv") site_trends <- site_trends %>% arrange(desc(abs(Beta))) # Optional: sort by magnitude of slope # Print the results print(site_trends) # Plot the trends with confidence intervals, including sample sizes in labels ggplot(site_trends, aes(x = Site, y = Beta, ymin = Beta - SE, ymax = Beta + SE)) + geom_point(size = 3, color = "blue") + geom_errorbar(width = 0.3, color = "black") + geom_hline(yintercept = 0, linetype = "dashed", color = "red") + coord_flip() + labs(title = "Site-Specific Coral Cover Trends Over Time", subtitle = "Numbers indicate sample size (n)", x = "Site", y = "Trend (Change in Coral Cover per Unit Time)") + theme_minimal() + geom_text(aes(label = paste0("n=", n)), hjust = -0.2, size = 4) ``` ##Coral cover and Depth polynomial model ```{r depth-nonlinear, echo=TRUE, message=FALSE, warning=FALSE} # Nonlinear (polynomial) model depth_poly_model <- lm(coral ~ poly(depth, 2), data = filtered_prevsub_coral) # Summary of the polynomial model summary(depth_poly_model) # Visualization: Coral cover by depth (nonlinear trend) ggplot(filtered_prevsub_coral, aes(x = depth, y = coral)) + geom_point(alpha = 0.6, color = "blue") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE, color = "darkgreen") + theme_minimal() + labs( title = "Nonlinear Relationship Between Coral Cover and Depth", x = "Depth (m)", y = "Coral Cover" ) ``` ## Algal change through time ```{r Algae through time, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries if (!requireNamespace("mgcv", quietly = TRUE)) install.packages("mgcv") if (!requireNamespace("gratia", quietly = TRUE)) install.packages("gratia") library(mgcv) library(gratia) library(dplyr) # Define benthic variables to model trends benthic_vars <- c("macroalgae", "caulerpa", "turf", "cyanobacteria", "cca") # Store models in a list algae_models <- list() # Fit GAMs for each benthic variable with time as a smooth term and site as a random effect for (var in benthic_vars) { formula_str <- paste(var, "~ s(z_date) + s(site, bs='re')") model_formula <- as.formula(formula_str) algae_models[[var]] <- gam( formula = model_formula, data = filtered_prevsub_coral, family = betar(), # Beta regression for proportional data method = "REML" # Penalized smoothing ) cat("\nGAM Summary for", var, ":\n") print(summary(algae_models[[var]])) } # Plot smooth effects of time for each benthic variable par(mfrow = c(2, 3)) # Arrange plots in a grid for (var in benthic_vars) { draw(algae_models[[var]]) } # Model diagnostics for (var in benthic_vars) { cat("\nChecking diagnostics for", var, ":\n") gam.check(algae_models[[var]]) } ``` ### coral and turf LM by site and date ```{r coral-turf-interaction, echo=TRUE, message=FALSE, warning=FALSE} # Analyze the relationship between coral cover and turf coral_turf_model <- lm(coral ~ turf + site + z_date, data = filtered_prevsub_coral) # Model summary summary(coral_turf_model) # Visualize coral cover vs. turf print( ggplot(filtered_prevsub, aes(x = turf, y = coral, color = site)) + geom_point(alpha = 0.6) + geom_smooth(method = "lm", se = FALSE, color = "blue") + theme_minimal() + labs( title = "Coral Cover vs. Turf Cover", x = "Turf Cover", y = "Coral Cover" ) ) ``` ###Local Temperature (HOBO) and coral cover among sites ```{r coral HOBO temp site fixed effect} # Load required package library(betareg) # 1) Filter dataset to rows with valid temperature measurements, coral cover, and site temp_data_coral <- filtered_prevsub_coral %>% filter(!is.na(mean_temp) & !is.na(max_temp) & !is.na(coral) & !is.na(site)) # 2) Fit a beta regression model of coral cover on mean_temp, max_temp, and site beta_coral_full <- betareg( coral ~ mean_temp + max_temp + site, data = temp_data_coral ) # Examine the model summary summary(beta_coral_full) # 3) If one temperature variable (e.g., max_temp) is not significant, refine the model beta_coral_refined <- betareg( coral ~ mean_temp + site, data = temp_data_coral ) # Compare the two models by AIC (optional) AIC(beta_coral_full, beta_coral_refined) # 4) Inspect the refined model summary(beta_coral_refined) # 5) (Optional) Plot diagnostic checks for the refined model par(mfrow = c(2, 2)) plot(beta_coral_refined) ``` #Proportion of Coral Taxa Analysis ## multivariate_beta_model shows no sign change in prop ```{r Beta_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE} library(brms) # For multiple responses in a single brms model, we specify a list of families—one for each response. # Because you have 5 responses, pass a list of 5 'beta()' families. multivariate_beta_model <- brm( mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date + (1 | site), data = filtered_prevsub, family = list(Beta(), Beta(), Beta(), Beta(), Beta()), # Capital B control = list(adapt_delta = 0.95), cores = 4, iter = 4000, seed = 123, ) summary(multivariate_beta_model) ``` ## multivariate_beta_interaction allow the effect of time (z_date) to vary by site ```{r Beta_interaction_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE} multivariate_beta_interaction <- brm( mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date * site + (1 | site), data = filtered_prevsub, family = list(Beta(), Beta(), Beta(), Beta(), Beta()), control = list(adapt_delta = 0.95), cores = 4, iter = 4000, seed = 123 ) summary(multivariate_beta_interaction) ``` ## Random Slopes proportion coral species - using this model ```{r Beta_random_slopes_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE} multivariate_beta_random_slopes <- brm( mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date + (1 + z_date | site), data = filtered_prevsub, family = list(Beta(), Beta(), Beta(), Beta(), Beta()), control = list(adapt_delta = 0.95), cores = 4, iter = 4000, seed = 123 ) summary(multivariate_beta_random_slopes) ``` ## separate beta regression models for each site (no additional fit) ```{r site_models_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE} library(dplyr) library(brms) # Fit models for each site site_models <- filtered_prevsub %>% group_by(site) %>% group_map(~ brm( mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date, data = .x, family = list(Beta(), Beta(), Beta(), Beta(), Beta()), control = list(adapt_delta = 0.95), cores = 4, iter = 4000, seed = 123 )) # Function to extract fixed effects from each model extract_fixed_effects <- function(model) { coef_summary <- as.data.frame(fixef(model)) # Extract fixed effects coef_summary$Parameter <- rownames(coef_summary) # Add parameter names return(coef_summary) } # Apply the function to all site models and store results in a list site_fixed_effects <- lapply(site_models, extract_fixed_effects) # Convert the list into a single data frame for easier visualization site_results <- bind_rows(site_fixed_effects, .id = "Site") # Rename columns appropriately based on the actual number of columns in fixef() colnames(site_results) <- c("Site", "Parameter", "Estimate", "SE", "L95_CI", "U95_CI") # Display the cleaned-up results print(site_results) # Save results to a CSV file for further analysis write.csv(site_results, "Site_Specific_Beta_Regression_Results.csv", row.names = FALSE) ``` ## If the trends appear non-linear, use smooth terms for time: (NOTE trends are approx linear) ```{r non-linear assess, echo=TRUE, message=FALSE, warning=FALSE} # Install necessary packages if not already installed if (!requireNamespace("mgcv", quietly = TRUE)) install.packages("mgcv") if (!requireNamespace("gratia", quietly = TRUE)) install.packages("gratia") # Load libraries library(mgcv) # For GAM models library(gratia) # For GAM visualization library(dplyr) # Data manipulation # Define coral proportion variables coral_taxa <- c("proppoc", "proppor", "proppav", "proppsam", "propoth") # Store GAM models in a list gam_models <- list() # Loop over each coral taxon and fit a GAM for (taxon in coral_taxa) { formula_str <- paste(taxon, "~ s(z_date, k=5) + s(site, bs='re')") # Smooth time, random site effect model_formula <- as.formula(formula_str) gam_models[[taxon]] <- gam( formula = model_formula, data = filtered_prevsub, family = betar(), # Beta regression for proportional data method = "REML" # Penalized smoothing ) cat("\nGAM Summary for", taxon, ":\n") print(summary(gam_models[[taxon]])) } # Extract significance of smooth terms for (taxon in coral_taxa) { cat("\nChecking significance of smooth term for", taxon, ":\n") print(summary(gam_models[[taxon]])$s.table) } # Fit linear models for comparison lm_models <- list() for (taxon in coral_taxa) { lm_models[[taxon]] <- lm(as.formula(paste(taxon, "~ z_date + site")), data = filtered_prevsub) } # Compare models using AIC aic_results <- data.frame( Taxon = coral_taxa, GAM_AIC = sapply(gam_models, AIC), LM_AIC = sapply(lm_models, AIC) ) print(aic_results) # Set up plotting area par(mfrow = c(2, 3)) # Loop over each coral taxon and plot the smooth function of time for (taxon in coral_taxa) { draw(gam_models[[taxon]]) # Uses gratia package for smooth function visualization } ``` ```{r GAM_models_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE} multivariate_gam <- brm( mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ s(z_date, by = site) + (1 | site), data = filtered_prevsub, family = list(Beta(), Beta(), Beta(), Beta(), Beta()), control = list(adapt_delta = 0.95), cores = 4, iter = 4000, seed = 123 ) ``` ### PCA Analysis ##PCA Full Model ```{r pca-analysis-cleaned-filtered, echo=TRUE, message=FALSE, warning=FALSE, cache=TRUE} library(dplyr) library(ggplot2) library(ggfortify) # Summarize data by site for the columns of interest pca_data <- filtered_prevsub_coral %>% group_by(site) %>% summarise( across( c(bleached, depth, coral, macroalgae, turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, totalpoc, totalpor, totalpav, totalpsamm, totalother, proppoc, proppor, proppsam, proppav, propoth), ~ mean(.x, na.rm = TRUE) ) ) %>% ungroup() # Keep only numeric columns (excluding site) pca_data_numeric_filtered <- pca_data %>% dplyr::select(where(is.numeric)) # Remove columns with too many missing values (>50%) pca_data_numeric_filtered <- pca_data_numeric_filtered %>% dplyr::select(where(~ mean(is.na(.)) < 0.5)) # Remove rows with too many missing values (>50%) pca_data_numeric_filtered <- pca_data_numeric_filtered %>% filter(rowMeans(is.na(.)) < 0.5) # Replace remaining NAs with column means pca_data_numeric_filtered <- pca_data_numeric_filtered %>% mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) # Remove rows where all values are zero pca_data_numeric_filtered <- pca_data_numeric_filtered[rowSums(pca_data_numeric_filtered, na.rm = TRUE) > 0, ] # Remove zero-variance columns zero_var_cols <- which(sapply(pca_data_numeric_filtered, function(col) { if (all(is.na(col))) return(TRUE) # Remove all-NA columns if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns return(FALSE) })) if (length(zero_var_cols) > 0) { message("Removing zero-variance columns: ", paste(names(pca_data_numeric_filtered)[zero_var_cols], collapse = ", ")) pca_data_numeric_filtered <- pca_data_numeric_filtered[, -zero_var_cols, drop = FALSE] } # Ensure at least two columns remain for PCA if (ncol(pca_data_numeric_filtered) < 2) { stop("Error: PCA requires at least two numeric columns. Check dataset preparation.") } # Scale data pca_data_scaled_filtered <- scale(pca_data_numeric_filtered) # Remove rows with any non-finite values row_is_finite_filtered <- apply(pca_data_scaled_filtered, 1, function(x) all(is.finite(x))) pca_data_cleaned_filtered <- pca_data_scaled_filtered[row_is_finite_filtered, , drop = FALSE] pca_data_used_filtered <- pca_data_numeric_filtered[row_is_finite_filtered, , drop = FALSE] # FIXED LINE # Ensure there are enough rows for PCA if (nrow(pca_data_cleaned_filtered) < 2) { stop("Error: PCA requires at least two observations after cleaning.") } # Perform PCA pca_res_filtered <- tryCatch( prcomp(pca_data_cleaned_filtered, center = TRUE, scale. = TRUE), error = function(e) { stop("Error in PCA computation: ", e$message) } ) # Create a data frame with PCA scores and site info if ("site" %in% colnames(pca_data)) { pca_scores_filtered <- data.frame(pca_res_filtered$x, site = pca_data$site[row_is_finite_filtered]) } else { pca_scores_filtered <- data.frame(pca_res_filtered$x) } # Extract PCA loadings pca_loadings_filtered <- as.data.frame(pca_res_filtered$rotation) pca_loadings_filtered$Variable <- rownames(pca_loadings_filtered) # Print numerical outputs print(summary(pca_res_filtered)) print(head(pca_scores_filtered)) print(pca_loadings_filtered) # Save PCA outputs write.csv(summary(pca_res_filtered)$importance, "PCA_Summary.csv", row.names = TRUE) write.csv(pca_scores_filtered, "PCA_Scores.csv", row.names = TRUE) write.csv(pca_loadings_filtered, "PCA_Loadings.csv", row.names = TRUE) cat("PCA summary, scores, and loadings have been saved as CSV files.\n") # PCA plot if ("site" %in% colnames(pca_scores_filtered)) { pca_plot_filtered <- autoplot( pca_res_filtered, data = pca_scores_filtered, colour = "site", label = FALSE, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 3) + theme(legend.position = "none") } else { pca_plot_filtered <- autoplot( pca_res_filtered, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + theme(legend.position = "none") } # Display PCA plot print(pca_plot_filtered) ``` ##PCA During Bleaching ```{r PCA_analysis_up to July 2024 During Bleaching data, echo=TRUE, message=FALSE, warning=FALSE} # Load required libraries library(dplyr) library(ggplot2) library(ggfortify) # 1) Filter dataset to include only data up to July 2024 filtered_pca_data <- filtered_prevsub_coral %>% filter(date <= as.Date("2024-07-18")) # Ensuring only relevant timeframe # 2) Summarize data by site for the columns of interest pca_data <- filtered_pca_data %>% group_by(site) %>% summarise( across( c(bleached, diseased, depth, coral, mean_temp, max_temp, min_temp, macroalgae, turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, totalpoc, totalpor, totalpav, totalpsamm, totalother, ph, sal, oxi, nit, pho), ~ mean(.x, na.rm = TRUE) ) ) %>% ungroup() # 3) Filter dataset: Remove selected variables pca_data_filtered <- pca_data %>% dplyr::select(-c(diseased, pho, ph, sal, min_temp, oxi, mean_temp, max_temp, nit, rubble, sand, cyanobacteria)) # 4) Keep only numeric columns (excluding site) pca_data_numeric_filtered <- pca_data_filtered %>% dplyr::select(where(is.numeric)) # 5) Remove columns with too many missing values (>50% missing) pca_data_numeric_filtered <- pca_data_numeric_filtered %>% dplyr::select(where(~ mean(is.na(.)) < 0.5)) # 6) Remove rows with too many missing values (>50% missing) pca_data_numeric_filtered <- pca_data_numeric_filtered %>% filter(rowMeans(is.na(.)) < 0.5) # 7) Replace remaining NAs with column means pca_data_numeric_filtered <- pca_data_numeric_filtered %>% mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) # 8) Remove rows where **all** values are zero pca_data_numeric_filtered <- pca_data_numeric_filtered[rowSums(pca_data_numeric_filtered, na.rm = TRUE) > 0, ] # 9) Remove zero-variance columns zero_var_cols <- which(sapply(pca_data_numeric_filtered, function(col) { if (all(is.na(col))) return(TRUE) # Remove all-NA columns if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns return(FALSE) })) if (length(zero_var_cols) > 0) { message("Removing zero-variance columns: ", paste(names(pca_data_numeric_filtered)[zero_var_cols], collapse = ", ")) pca_data_numeric_filtered <- pca_data_numeric_filtered[, -zero_var_cols, drop = FALSE] } # 10) Ensure at least two columns remain for PCA if (ncol(pca_data_numeric_filtered) < 2) { stop("Error: PCA requires at least two numeric columns. Check dataset preparation.") } # 11) Scale data (mean=0, sd=1) pca_data_scaled_filtered <- scale(pca_data_numeric_filtered) # 12) Remove rows with any non-finite values (NA/Inf) row_is_finite_filtered <- apply(pca_data_scaled_filtered, 1, function(x) all(is.finite(x))) pca_data_cleaned_filtered <- pca_data_scaled_filtered[row_is_finite_filtered, , drop = FALSE] pca_data_used_filtered <- pca_data_filtered[row_is_finite_filtered, , drop = FALSE] # 13) Perform PCA on the cleaned data pca_res_filtered <- tryCatch( prcomp(pca_data_cleaned_filtered, center = TRUE, scale. = TRUE), error = function(e) { stop("Error in PCA computation: ", e$message) } ) # 14) Create a data frame with PCA scores + site info if ("site" %in% colnames(pca_data_used_filtered)) { pca_scores_filtered <- data.frame(pca_res_filtered$x, site = pca_data_used_filtered$site) } else { pca_scores_filtered <- data.frame(pca_res_filtered$x) } # 15) Extract and organize PCA loadings pca_loadings_filtered <- as.data.frame(pca_res_filtered$rotation) pca_loadings_filtered$Variable <- rownames(pca_loadings_filtered) # Print numerical outputs: PCA Summary & Scores print(summary(pca_res_filtered)) print(head(pca_scores_filtered)) print(pca_loadings_filtered) # Save PCA Summary pca_summary <- summary(pca_res_filtered) write.csv(pca_summary$importance, "PCA_Summary_Before_tot.csv", row.names = TRUE) # Save PCA Scores write.csv(pca_scores_filtered, "PCA_Scores_Before_tot.csv", row.names = TRUE) # Save PCA Loadings write.csv(pca_loadings_filtered, "PCA_Loadings_Before_tot.csv", row.names = TRUE) # Confirm successful export cat("PCA summary, scores, and loadings have been saved as CSV files with 'Before' in filenames.\n") # Load required packages library(ggplot2) library(ggfortify) library(RColorBrewer) # Ensure enough colors for the number of sites num_sites <- length(unique(pca_scores_filtered$site)) site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites) # Dynamically generate colors # Create PCA plot with improved formatting if ("site" %in% colnames(pca_scores_filtered)) { pca_plot_filtered <- autoplot( pca_res_filtered, data = pca_scores_filtered, colour = "site", label = FALSE, # Remove default text labels from autoplot loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") + # Increase font size scale_color_manual(values = site_colors) + # Use dynamically generated color palette theme_minimal(base_size = 14) + # Increase overall font size theme( legend.position = "none", # Remove legend axis.title = element_text(size = 16, face = "bold"), # Increase axis title size axis.text = element_text(size = 14), # Increase axis text size plot.title = element_text(size = 18, face = "bold") # Increase title size ) + labs(title = "PCA Analysis of Reef Benthic Composition (Before)") } else { pca_plot_filtered <- autoplot( pca_res_filtered, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + theme_minimal(base_size = 14) + theme( legend.position = "none", axis.title = element_text(size = 16, face = "bold"), axis.text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold") ) + labs(title = "PCA Analysis of Reef Benthic Composition (Before)") } # Explicitly plot the PCA visualization plot(pca_plot_filtered) # Save the updated PCA plot ggsave("PCA_Plot_Before_tot.png", plot = pca_plot_filtered, width = 8, height = 6, dpi = 300) # Confirm successful export cat("Updated PCA plot has been saved as 'PCA_Plot_Before.png' with improved font sizes and site colors.\n") ``` ##PCA After Bleaching ```{r PCA_analysis_After_AugusttoDecember_2024, echo=TRUE, message=FALSE, warning=FALSE} # Load required libraries library(dplyr) library(ggplot2) library(ggfortify) # 1) Filter dataset to include only data from August 20, 2024, to December 2024 filtered_pca_data_after <- filtered_prevsub %>% filter(date >= as.Date("2024-08-20") & date <= as.Date("2024-12-31")) # 2) Summarize data by site for the columns of interest pca_data_after <- filtered_pca_data_after %>% group_by(site) %>% summarise( across( c(bleached, diseased, depth, coral, mean_temp, max_temp, min_temp, macroalgae, turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, totalpoc, totalpor, totalpav, totalpsamm, totalother, ph, sal, oxi, nit, pho), ~ mean(.x, na.rm = TRUE) ) ) %>% ungroup() # 3) Filter dataset: Remove selected variables (same as "Before" PCA) pca_data_filtered_after <- pca_data_after %>% dplyr::select(-c(diseased, pho, ph, sal, min_temp, oxi, mean_temp, max_temp, nit, rubble, sand, cyanobacteria)) # 4) Keep only numeric columns (excluding site) pca_data_numeric_after <- pca_data_filtered_after %>% dplyr::select(where(is.numeric)) # 5) Remove columns with too many missing values (>50% missing) pca_data_numeric_after <- pca_data_numeric_after %>% dplyr::select(where(~ mean(is.na(.)) < 0.5)) # 6) Remove rows with too many missing values (>50% missing) pca_data_numeric_after <- pca_data_numeric_after %>% filter(rowMeans(is.na(.)) < 0.5) # 7) Replace remaining NAs with column means pca_data_numeric_after <- pca_data_numeric_after %>% mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) # 8) Remove rows where **all** values are zero pca_data_numeric_after <- pca_data_numeric_after[rowSums(pca_data_numeric_after, na.rm = TRUE) > 0, ] # 9) Remove zero-variance columns zero_var_cols <- which(sapply(pca_data_numeric_after, function(col) { if (all(is.na(col))) return(TRUE) # Remove all-NA columns if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns return(FALSE) })) if (length(zero_var_cols) > 0) { message("Removing zero-variance columns: ", paste(names(pca_data_numeric_after)[zero_var_cols], collapse = ", ")) pca_data_numeric_after <- pca_data_numeric_after[, -zero_var_cols, drop = FALSE] } # 10) Ensure at least two columns remain for PCA if (ncol(pca_data_numeric_after) < 2) { stop("Error: PCA requires at least two numeric columns. Check dataset preparation.") } # 11) Scale data (mean=0, sd=1) pca_data_scaled_after <- scale(pca_data_numeric_after) # 12) Remove rows with any non-finite values (NA/Inf) row_is_finite_after <- apply(pca_data_scaled_after, 1, function(x) all(is.finite(x))) pca_data_cleaned_after <- pca_data_scaled_after[row_is_finite_after, , drop = FALSE] pca_data_used_after <- pca_data_filtered_after[row_is_finite_after, , drop = FALSE] # 13) Perform PCA on the cleaned data pca_res_after <- tryCatch( prcomp(pca_data_cleaned_after, center = TRUE, scale. = TRUE), error = function(e) { stop("Error in PCA computation: ", e$message) } ) # 14) Create a data frame with PCA scores + site info if ("site" %in% colnames(pca_data_used_after)) { pca_scores_after <- data.frame(pca_res_after$x, site = pca_data_used_after$site) } else { pca_scores_after <- data.frame(pca_res_after$x) } # 15) Extract and organize PCA loadings pca_loadings_after <- as.data.frame(pca_res_after$rotation) pca_loadings_after$Variable <- rownames(pca_loadings_after) # Print numerical outputs: PCA Summary & Scores print(summary(pca_res_after)) print(head(pca_scores_after)) print(pca_loadings_after) # Save PCA Summary pca_summary_after <- summary(pca_res_after) write.csv(pca_summary_after$importance, "PCA_Summary_After_tot.csv", row.names = TRUE) # Save PCA Scores write.csv(pca_scores_after, "PCA_Scores_After_tot.csv", row.names = TRUE) # Save PCA Loadings write.csv(pca_loadings_after, "PCA_Loadings_After_tot.csv", row.names = TRUE) # Confirm successful export cat("PCA summary, scores, and loadings have been saved as CSV files with 'After' in filenames.\n") # Load required packages library(ggplot2) library(ggfortify) library(RColorBrewer) # Ensure enough colors for the number of sites num_sites <- length(unique(pca_scores_after$site)) site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites) # Dynamically generate colors # Create PCA plot with improved formatting if ("site" %in% colnames(pca_scores_after)) { pca_plot_after <- autoplot( pca_res_after, data = pca_scores_after, colour = "site", label = FALSE, # Remove default text labels from autoplot loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") + # Increase font size scale_color_manual(values = site_colors) + # Use dynamically generated color palette theme_minimal(base_size = 14) + # Increase overall font size theme( legend.position = "none", # Remove legend axis.title = element_text(size = 16, face = "bold"), # Increase axis title size axis.text = element_text(size = 14), # Increase axis text size plot.title = element_text(size = 18, face = "bold") # Increase title size ) + labs(title = "PCA Analysis of Reef Benthic Composition (After)") } else { pca_plot_after <- autoplot( pca_res_after, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + theme_minimal(base_size = 14) + theme( legend.position = "none", axis.title = element_text(size = 16, face = "bold"), axis.text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold") ) + labs(title = "PCA Analysis of Reef Benthic Composition (After)") } # Explicitly plot the PCA visualization plot(pca_plot_after) # Save the updated PCA plot ggsave("PCA_Plot_After_tot.png", plot = pca_plot_after, width = 8, height = 6, dpi = 300) # Confirm successful export cat("Updated PCA plot has been saved as 'PCA_Plot_After.png' with improved font sizes and site colors.\n") ``` ##PCA Latest Data ```{r PCA_analysis_Latest_AfterJanuary_2025, echo=TRUE, message=FALSE, warning=FALSE} # Load required libraries library(dplyr) library(ggplot2) library(ggfortify) library(RColorBrewer) # Ensure dataset exists if (!exists("filtered_prevsub")) { stop("Error: The dataset 'filtered_prevsub' is missing. Ensure it's loaded before running this script.") } # Step 1: Remove Chorro transects 1, 2, and 3 **before selecting latest data** if ("transect" %in% colnames(filtered_prevsub)) { filtered_prevsub <- filtered_prevsub %>% filter(!(site == "Chorro" & transect %in% c(1, 2, 3))) # Exclude these transects } else { stop("Error: 'transect' column is missing. Ensure it's included in filtered_prevsub.") } # Step 2: Confirm Chorro still exists but without transects 1, 2, 3 remaining_transects <- unique(filtered_prevsub$transect[filtered_prevsub$site == "Chorro"]) if (length(remaining_transects) == 0) { stop("Error: No Chorro transects remain after filtering. Check dataset.") } else { cat("✅ Chorro transects 1, 2, and 3 removed. Remaining transects:\n") print(remaining_transects) } # Step 3: Now filter dataset to **only include data from January 2025 onwards** filtered_pca_data_latest <- filtered_prevsub %>% filter(date >= as.Date("2025-01-01")) # Step 4: Select **latest recorded data per site** after filtering latest_pca_data <- filtered_pca_data_latest %>% group_by(site) %>% slice_max(order_by = date, n = 1, with_ties = FALSE) %>% ungroup() # Step 5: Select the key benthic variables (aligning with Before/After PCA) pca_data_latest <- latest_pca_data %>% select( site, bleached, coral, turf, cca, depth, rock, deadcoral, totalpoc, totalother, totalpor, deadcoral, totalpsamm, totalpav, macroalgae, caulerpa, ) %>% drop_na() # Remove any rows with missing values # Step 6: Keep only numeric columns (excluding site) pca_data_numeric_latest <- pca_data_latest %>% select(where(is.numeric)) # Step 7: Remove zero-variance columns zero_var_cols <- which(sapply(pca_data_numeric_latest, function(col) { if (all(is.na(col))) return(TRUE) # Remove all-NA columns if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns return(FALSE) })) if (length(zero_var_cols) > 0) { message("Removing zero-variance columns: ", paste(names(pca_data_numeric_latest)[zero_var_cols], collapse = ", ")) pca_data_numeric_latest <- pca_data_numeric_latest[, -zero_var_cols, drop = FALSE] } # Ensure at least two columns remain for PCA if (ncol(pca_data_numeric_latest) < 2) { stop("Error: PCA requires at least two numeric columns. Check dataset preparation.") } # Step 8: Scale data (mean=0, sd=1) pca_data_scaled_latest <- scale(pca_data_numeric_latest) # Step 9: Remove rows with any non-finite values (NA/Inf) row_is_finite_latest <- apply(pca_data_scaled_latest, 1, function(x) all(is.finite(x))) pca_data_cleaned_latest <- pca_data_scaled_latest[row_is_finite_latest, , drop = FALSE] pca_data_used_latest <- pca_data_latest[row_is_finite_latest, , drop = FALSE] # Step 10: Perform PCA on the cleaned data pca_res_latest <- tryCatch( prcomp(pca_data_cleaned_latest, center = TRUE, scale. = TRUE), error = function(e) { stop("Error in PCA computation: ", e$message) } ) # Step 11: Create a data frame with PCA scores + site info if ("site" %in% colnames(pca_data_used_latest)) { pca_scores_latest <- data.frame(pca_res_latest$x, site = pca_data_used_latest$site) } else { pca_scores_latest <- data.frame(pca_res_latest$x) } # Step 12: Extract and organize PCA loadings pca_loadings_latest <- as.data.frame(pca_res_latest$rotation) pca_loadings_latest$Variable <- rownames(pca_loadings_latest) # Print numerical outputs: PCA Summary & Scores print(summary(pca_res_latest)) print(head(pca_scores_latest)) print(pca_loadings_latest) # Save PCA Summary write.csv(summary(pca_res_latest)$importance, "PCA_Summary_Latest_tot.csv", row.names = TRUE) write.csv(pca_scores_latest, "PCA_Scores_Latest_tot.csv", row.names = TRUE) write.csv(pca_loadings_latest, "PCA_Loadings_Latest_tot.csv", row.names = TRUE) cat("✅ PCA results saved as CSV files with 'Latest' in filenames.\n") # Step 13: Create PCA plot num_sites <- length(unique(pca_scores_latest$site)) site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites) # Generate colors pca_plot_latest <- autoplot( pca_res_latest, data = pca_scores_latest, colour = "site", label = FALSE, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") + scale_color_manual(values = site_colors) + theme_minimal(base_size = 14) + theme( legend.position = "none", axis.title = element_text(size = 16, face = "bold"), axis.text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold") ) + labs(title = "PCA Analysis of Reef Benthic Composition (Latest - 2025)") # Plot the PCA visualization plot(pca_plot_latest) # Save the updated PCA plot ggsave("PCA_Plot_Latest_tot.png", plot = pca_plot_latest, width = 8, height = 6, dpi = 300) cat("✅ PCA plot saved as 'PCA_Plot_Latest.png' with improved formatting.\n") ``` ##PCA Comparisons ```{r PCA comparisons, echo=TRUE, message=FALSE, warning=FALSE} # Extract variance explained (proportion of variance per PC) variance_comparison <- data.frame( Time_Period = c("Filtered", "After", "Latest"), PC1 = c(pca_res_filtered$sdev[1]^2 / sum(pca_res_filtered$sdev^2), pca_res_after$sdev[1]^2 / sum(pca_res_after$sdev^2), pca_res_latest$sdev[1]^2 / sum(pca_res_latest$sdev^2)), PC2 = c(pca_res_filtered$sdev[2]^2 / sum(pca_res_filtered$sdev^2), pca_res_after$sdev[2]^2 / sum(pca_res_after$sdev^2), pca_res_latest$sdev[2]^2 / sum(pca_res_latest$sdev^2)) ) print(variance_comparison) # Check variables in each PCA loading matrix print(rownames(pca_loadings_filtered)) print(rownames(pca_loadings_after)) print(rownames(pca_loadings_latest)) # Find common variables across all time periods common_vars <- Reduce(intersect, list(rownames(pca_loadings_filtered), rownames(pca_loadings_after), rownames(pca_loadings_latest))) print(common_vars) # See which variables are shared across all PCAs library(ggplot2) library(reshape2) # Identify common variables across all PCA loadings common_vars <- Reduce(intersect, list(rownames(pca_loadings_filtered), rownames(pca_loadings_after), rownames(pca_loadings_latest))) print(common_vars) # Check shared variables # Subset PCA loadings to include only common variables pca_loadings_df <- data.frame( Variable = common_vars, PC1_Filtered = pca_loadings_filtered[common_vars, 1], PC1_After = pca_loadings_after[common_vars, 1], PC1_Latest = pca_loadings_latest[common_vars, 1] ) # Print to verify that all rows match print(pca_loadings_df) # Reshape for ggplot pca_loadings_melted <- melt(pca_loadings_df, id.vars = "Variable") # Plot PCA loadings across time ggplot(pca_loadings_melted, aes(x = Variable, y = value, fill = variable)) + geom_bar(stat = "identity", position = "dodge") + coord_flip() + theme_minimal() + labs(title = "PCA Loadings Comparison Across Time Periods", x = "Benthic Variable", y = "PC1 Loading", fill = "Time Period") # Scatter plot showing site movement in PCA space plot(pca_scores_filtered[,1], pca_scores_filtered[,2], col = "blue", pch = 16, xlab = "PC1", ylab = "PC2", main = "PCA Trajectories Across Time") points(pca_scores_after[,1], pca_scores_after[,2], col = "orange", pch = 16) points(pca_scores_latest[,1], pca_scores_latest[,2], col = "red", pch = 16) arrows(pca_scores_filtered[,1], pca_scores_filtered[,2], pca_scores_latest[,1], pca_scores_latest[,2], col = "black", length = 0.1) legend("topright", legend = c("Filtered", "After", "Latest"), col = c("blue", "orange", "red"), pch = 16) ``` ```{r combined PCA plots, echo=TRUE, message=FALSE, warning=FALSE} # Load required libraries library(ggfortify) library(patchwork) # For combining plots # Define a stronger color palette for better visibility site_colors <- c( "Ancla" = "#E41A1C", "Barco Profundo" = "#377EB8", "Barco Somero" = "#4DAF4A", "Chorro" = "#984EA3", "Cueva" = "#FF7F00", "Esquina" = "#F781BF", "San Josecito" = "#A65628", "Tina" = "#228833", "Este Intermedio" = "#CC79A7" ) # Function to generate PCA plots with percentage variance in axis labels create_pca_plot <- function(pca_res, pca_scores, title) { # Extract variance explained by PC1 and PC2 variance_explained <- summary(pca_res)$importance[2, 1:2] * 100 # Convert to percentage pc1_label <- paste0("PC1 (", round(variance_explained[1], 1), "%)") pc2_label <- paste0("PC2 (", round(variance_explained[2], 1), "%)") if ("site" %in% colnames(pca_scores)) { pca_plot <- autoplot( pca_res, data = pca_scores, colour = "site", label = FALSE, # Remove default text labels from autoplot loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + scale_color_manual(values = site_colors) + # Use site-specific colors geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 3.5, fontface = "bold") + # Adjusted label size theme_minimal(base_size = 14) + # Increase font size for readability theme( legend.position = "none", # Remove legend axis.title = element_text(size = 16, face = "bold"), axis.text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold", hjust = 0.5) ) + labs(title = title, x = pc1_label, y = pc2_label) # Use dynamic axis labels } else { pca_plot <- autoplot( pca_res, loadings = TRUE, loadings.colour = "black", loadings.label = TRUE, loadings.label.colour = "black" ) + theme_minimal(base_size = 14) + theme( legend.position = "none", axis.title = element_text(size = 16, face = "bold"), axis.text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold", hjust = 0.5) ) + labs(title = title, x = pc1_label, y = pc2_label) # Use dynamic axis labels } return(pca_plot) } # Generate PCA plots with updated titles and dynamic axis labels pca_plot_before <- create_pca_plot(pca_res_filtered, pca_scores_filtered, "During Bleaching") pca_plot_after <- create_pca_plot(pca_res_after, pca_scores_after, "After Bleaching") pca_plot_latest <- create_pca_plot(pca_res_latest, pca_scores_latest, "Latest Data") # Combine the three plots in a single row for direct comparison combined_pca_plot <- (pca_plot_before | pca_plot_after | pca_plot_latest) # Display the combined PCA plots print(combined_pca_plot) ``` ##Composite Restoration Feasibility Analysis ```{r shannon-summary-analysis-latest, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(ggplot2) library(vegan) # For Shannon diversity index calculation library(FSA) # For Dunn's post-hoc test library(car) # 1) Identify the latest date per site latest_data <- filtered_prevsub_coral %>% group_by(site) %>% filter(date == max(date, na.rm = TRUE)) %>% ungroup() # 2) Define the species count columns (Tot_ dataset) species_count_columns <- c( "Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus", "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians", "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", "Tot_psammacorastellata", "Tot_psammacoraprofundacella", "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea", "Tot_other" ) # 3) Compute Shannon Index on the latest data using species counts latest_data <- latest_data %>% mutate(Shannon_Index_Latest = diversity(select(., all_of(species_count_columns)), index = "shannon")) # 4) Remove rows with NA values in species counts latest_data <- latest_data %>% drop_na(all_of(species_count_columns)) # 5) Inspect the new Shannon column summary(latest_data$Shannon_Index_Latest) # 6) Summarize by site for the latest data latest_shannon_summary <- latest_data %>% group_by(site) %>% summarise( mean_shannon = mean(Shannon_Index_Latest, na.rm = TRUE), sd_shannon = sd(Shannon_Index_Latest, na.rm = TRUE), n = n() ) %>% arrange(desc(mean_shannon)) print("Summary statistics for Shannon Index (Latest) by site:") print(latest_shannon_summary) # 7) Save computed Shannon Index data write.csv(latest_data, "Shannon_Index_Latest_Data.csv", row.names = FALSE) cat("Shannon Index Latest Data saved as 'Shannon_Index_Latest_Data.csv'\n") # 8) Violin Plot of Shannon Index (LATEST) by site p_latest <- ggplot(latest_data, aes(x = site, y = Shannon_Index_Latest, fill = site)) + geom_violin(trim = FALSE, scale = "width", alpha = 0.6, width = 0.9) + geom_jitter(aes(fill = site), color = "black", size = 2.5, shape = 21, width = 0.2, alpha = 0.7) + stat_summary(fun.data = "mean_cl_boot", colour = "black", fill = "white", size = 1, shape = 21) + scale_fill_brewer(palette = "Paired") + theme_minimal(base_size = 15) + labs(title = "Shannon Diversity Index (Latest) by Site", x = "Site", y = "Shannon Index (Latest)") + theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) # Save and print the plot ggsave("Violin_Plot_Shannon_Index_Latest.png", plot = p_latest, width = 12, height = 6, dpi = 300) cat("Violin plot saved as 'Violin_Plot_Shannon_Index_Latest.png'\n") print(p_latest) # 9) Kruskal-Wallis test for Shannon diversity across sites kruskal_test1 <- kruskal.test(Shannon_Index_Latest ~ site, data = latest_data) cat("Kruskal-Wallis Test Results for Shannon Index by Site:\n") print(kruskal_test1) # 10) Dunn's post-hoc test (if Kruskal-Wallis is significant) if (kruskal_test1$p.value < 0.05) { posthoc1 <- dunnTest(Shannon_Index_Latest ~ site, data = latest_data, method = "bonferroni") cat("\nPost-hoc pairwise comparisons (Dunn's Test):\n") print(posthoc1$res) } else { cat("\nKruskal-Wallis Test not significant, skipping post-hoc comparisons.\n") } # 11) Scatter plot: Shannon Index vs. Depth ggplot(latest_data, aes(x = depth, y = Shannon_Index_Latest)) + geom_point(size = 3, alpha = 0.7, color = "blue") + geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") + theme_minimal() + labs(title = "Relationship between Shannon Diversity (Latest) and Depth", x = "Depth (m)", y = "Shannon Index (Latest)") # 12) Spearman correlation test for Shannon Index vs. Depth cor_test <- cor.test(latest_data$Shannon_Index_Latest, latest_data$depth, method = "spearman") cat("\nSpearman Correlation between Shannon Index (Latest) and Depth:\n") print(cor_test) ``` ```{r Alternative_composite_feasibility_NO SIMPER, echo=TRUE, message=FALSE, warning=FALSE} # Load required libraries library(dplyr) library(tidyr) library(ggplot2) library(brms) # (if needed for additional modeling) # 1) Define expert impact directions with enhanced weight for coral cover impact_direction_alt <- data.frame( Variable = c("coral", "cca", "bleached", "turf", "cyanobacteria", "shannon"), Direction = c(+4, +2, -2, -3, -2, +2) # Stronger positive weight for coral; increased penalties for turf and cyanobacteria ) # 2) Summarize PCA loadings (using pca_loadings_latest dataset) pca_summarized_alt <- pca_loadings_latest %>% mutate(LoadingSum = abs(PC1) + abs(PC2) + abs(PC3)) %>% select(Variable, LoadingSum) # 3) Merge expert impact directions with PCA loadings combined_df_alt <- impact_direction_alt %>% left_join(pca_summarized_alt, by = "Variable") %>% mutate(LoadingSum = if_else(is.na(LoadingSum), 0, LoadingSum)) %>% # Compute composite weight as the product of the loading sum and the expert impact direction mutate(CompositeWeight = LoadingSum * Direction) # 4) Merge Shannon diversity into site-level data (latest_data_with_shannon) latest_data_with_shannon <- latest_data %>% left_join(latest_shannon_summary %>% select(site, mean_shannon) %>% rename(shannon = mean_shannon), by = "site") # 5) Scale the site data for each variable included in combined_df_alt scaled_latest_alt <- latest_data_with_shannon %>% mutate(across(all_of(combined_df_alt$Variable), scale, .names = "scaled_{.col}")) # 6) Convert data to long format for weighting long_latest_alt <- scaled_latest_alt %>% pivot_longer( cols = starts_with("scaled_"), names_to = "Variable", values_to = "Scaled_Value" ) %>% mutate(Variable = sub("scaled_", "", Variable)) # Remove prefix # 7) Compute final weighted contributions weighted_latest_alt <- long_latest_alt %>% left_join(combined_df_alt %>% select(Variable, CompositeWeight), by = "Variable") %>% mutate(Weighted_Contribution = Scaled_Value * CompositeWeight) # 8) Summarize feasibility scores per site feasibility_scores_alt <- weighted_latest_alt %>% group_by(site) %>% summarise(Feasibility_Score = sum(Weighted_Contribution, na.rm = TRUE)) %>% arrange(desc(Feasibility_Score)) # Print final feasibility scores cat("\nAlternative Feasibility Scores (Excluding SIMPER):\n") print(feasibility_scores_alt) # 9) Visualize the feasibility scores feasibility_plot_alt <- ggplot(feasibility_scores_alt, aes(x = reorder(site, Feasibility_Score), y = Feasibility_Score, fill = Feasibility_Score)) + geom_bar(stat = "identity", show.legend = FALSE) + coord_flip() + labs(title = "Alternative Composite Feasibility Index by Site (Excluding SIMPER)", x = "Site", y = "Feasibility Score (Weighted)") + theme_minimal() # Save the plot ggsave("feasibility_plot_alternative.png", plot = feasibility_plot_alt, width = 8, height = 6, dpi = 300) print(feasibility_plot_alt) ``` ## Including Plots You can also embed plots, for example: ```{r pressure, echo=FALSE} plot(pressure) ``` Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.