---
title: "Cano_Declines_PeerJ"
author: "Caroline Palmer"
date: "2025-03-13"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r load_libraries}
# Load necessary libraries
library(tidyverse)
library(lubridate)
library(brms)
library(patchwork)
library(ggplot2)
library(dplyr)
library(vegan) # For Shannon diversity index calculation
library(tidyr)
library(broom)
library(purrr)
library(readr)
library(car)
library(emmeans)
library(lme4)
library(FSA)    # For Dunn's post-hoc test

# Ensure the brms package is installed
if (!requireNamespace("brms", quietly = TRUE)) {
  install.packages("brms")
}

```

```{r preprocess data}

# Read and preprocess data
prevsub <- read_csv("prevsub.csv", col_types = cols(
  bleached = col_double(),
  diseased = col_double(),
  coral = col_double(),
  mean_temp = col_double(),
  max_temp = col_double(),
  min_temp = col_double(),
  macroalgae = col_double(),
  turf = col_double(),
  cyanobacteria = col_double(),
  caulerpa = col_double(),
  deadcoral = col_double(),
  cca = col_double(),
  other = col_double(), 
  rubble = col_double(),
  sand = col_double(),
  rock = col_double(),
  proppoc = col_double(),
  proppor = col_double(),
  proppav = col_double(),
  proppsam = col_double(),
  propoth = col_double(),
  poc_mbb = col_double(),
  por_mbb = col_double(),
  pav_mbb = col_double(),
  psam_mbb = col_double(),
  other_mbb = col_double(),
  poc_bleached = col_double(),
  por_bleached = col_double(),
  pav_bleached = col_double(),
  psamm_bleached = col_double(),
  other_corals_bleaching = col_double(),
  totalpoc = col_double(),
  totalpor = col_double(),
  totalpav = col_double(),
  totalpsamm = col_double(),
  totalother = col_double(),
 mean_temp = col_double(),  # Mean temperature as numeric
  max_temp = col_double(),   # Max temperature as numeric
  min_temp = col_double(),   # Min temperature as numeric
  dive_temp = col_double(),  # Dive temperature as numeric
  ph = col_double(),         # pH as numeric
  sal = col_double(),        # Salinity as numeric
  vis = col_double(),        # Visibility as numeric
  oxi = col_double(),        # Oxygen as numeric
  nit = col_double(),        # Nitrate as numeric
  pho = col_double(),        # Phosphate as numeric
  mean_mon_sst = col_double(), # Monthly mean SST
  max_mon_sst = col_double(),  # Monthly max SST
  min_mon_sst = col_double(),
  site = col_character(),
  date = col_character()  # Treat 'date' as character initially
))

# Parse 'date' into a proper Date format
prevsub <- prevsub %>% mutate(date = dmy(date))

# Remove rows where all values are NA
prevsub <- prevsub %>%
    filter(rowSums(is.na(.)) != ncol(.))


# Ensure 'date' format is consistent (redundant if using dmy, but kept as a check)
prevsub <- prevsub %>%
  mutate(date = as.Date(date, format = "%d/%m/%Y"))

# Convert site to factor
prevsub <- prevsub %>%
  mutate(site = as.factor(site))

# Add scaled date
prevsub <- prevsub %>%
  mutate(
    z_date = scale(as.numeric(date))
  )

# Ensure rows with missing 'bleached' but valid 'coral' are retained
prevsub <- prevsub %>%
  filter(!is.na(z_date) & is.finite(z_date))


# Check for invalid dates
invalid_dates <- prevsub %>%
  filter(is.na(date))

if (nrow(invalid_dates) > 0) {
  print("Invalid dates found:")
  print(invalid_dates)
  # Remove invalid dates
  prevsub <- prevsub %>% filter(!is.na(date))
} else {
  print("All dates are valid!")
}


# Step 1: Convert NA to structural zeros for total bleaching columns
bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching")
total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother")

for (i in seq_along(bleaching_cols)) {
  prevsub[[bleaching_cols[i]]] <- ifelse(
    is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0,
    0,
    prevsub[[bleaching_cols[i]]]
  )
}

# Step 2: Convert NA to structural zeros for mostly bleached and bleached data
species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb")

for (i in seq_along(species_cols)) {
  prevsub[[species_cols[i]]] <- ifelse(
    is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0,
    0,
    prevsub[[species_cols[i]]]
  )
}

# Step 3: Constrain proportions for beta regression for additional columns
additional_proportion_cols <- c(
  "coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca", "rubble", "rock", "sand", "deadcoral", "other",
  "proppoc", "proppor", "proppav", "proppsam", "propoth",
  "poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb",
  "poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching"
)

prevsub <- prevsub %>%
  mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001)))

# Step 4: Create structural zero indicators for all relevant columns
for (i in seq_along(total_cols)) {
  prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse(
    prevsub[[total_cols[i]]] == 0,
    1,
    0
  )
  prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse(
    prevsub[[total_cols[i]]] == 0,
    1,
    0
  )
}

# Constrain response variables for zero_inflated_beta family
prevsub <- prevsub %>%
  mutate(
    macroalgae = pmax(pmin(macroalgae, 0.9999), 0.0001),
    caulerpa = pmax(pmin(caulerpa, 0.9999), 0.0001),
    turf = pmax(pmin(turf, 0.9999), 0.0001),
    cyanobacteria = pmax(pmin(cyanobacteria, 0.9999), 0.0001),
    cca = pmax(pmin(cca, 0.9999), 0.0001)
  )

# Filter for selected sites
selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo")
filtered_prevsub <- prevsub %>%
  filter(site %in% selected_sites) %>%
  mutate(site = droplevels(site)) 

filtered_prevsub_coral <- filtered_prevsub %>%
  filter(!is.na(coral))

# Save removed data for later use
removed_data <- filtered_prevsub %>%
    filter(site == "Barco Somero" & format(date, "%Y-%m") == "2024-03")

# Proceed with filtered data for analysis
filtered_prevsub <- filtered_prevsub %>%
    filter(!(site == "Barco Somero" & format(date, "%Y-%m") == "2024-03"))

# Save removed data for later use
removed_data_ancla <- filtered_prevsub %>%
    filter(site == "Ancla" & format(date, "%Y-%m") == "2024-04")

# Proceed with filtered data for analysis (excluding Ancla's April 2024 data)
filtered_prevsub <- filtered_prevsub %>%
    filter(!(site == "Ancla" & format(date, "%Y-%m") == "2024-04"))
```


#Coral Diversity Analysis Full (Shannon)

```{r shannon-analysis-Tot_Counts, echo=TRUE, message=FALSE, warning=FALSE}

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(vegan)  # For Shannon diversity index calculation
library(car)
library(emmeans)
library(lme4)
library(FSA)    # For Dunn's post-hoc test

# Define the species count columns
species_count_columns <- c(
  "Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus", 
  "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians",
  "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", "Tot_psammacorastellata",
  "Tot_psammacoraprofundacella", "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea",
  "Tot_other"
)

# Create a copy of the dataset for Shannon Index analysis
filtered_prevsub_shannon <- filtered_prevsub %>%
  drop_na(all_of(species_count_columns)) %>%
  mutate(Shannon_Index_Healthy = diversity(select(., all_of(species_count_columns)), index = "shannon")) %>%
  group_by(site) %>%
  filter(n() > 3) %>%  # Remove sites with fewer than 3 observations
  ungroup()

# Save computed Shannon Index data
write.csv(filtered_prevsub_shannon, "Shannon_Index_Healthy_Data.csv", row.names = FALSE)
cat("Shannon Index Healthy Data saved as 'Shannon_Index_Healthy_Data.csv'\n")

# Summarize Shannon Index statistics by site
shannon_summary_healthy <- filtered_prevsub_shannon %>%
  group_by(site) %>%
  summarise(
    mean_shannon = mean(Shannon_Index_Healthy, na.rm = TRUE),
    sd_shannon = sd(Shannon_Index_Healthy, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(desc(mean_shannon))

print("Summary statistics for Shannon Index (Healthy) by site:")
print(shannon_summary_healthy)

# Violin Plot: Shannon Index by Site (Healthy Corals)
p_healthy <- ggplot(filtered_prevsub_shannon, aes(x = site, y = Shannon_Index_Healthy, fill = site)) +
  geom_violin(trim = FALSE, scale = "width", alpha = 0.6, width = 0.9) +  
  geom_jitter(aes(fill = site), color = "black", size = 2.5, shape = 21, width = 0.2, alpha = 0.7) +  
  stat_summary(fun.data = "mean_cl_boot", colour = "black", fill = "white", size = 1, shape = 21) +  
  scale_fill_brewer(palette = "Paired") +  
  theme_minimal(base_size = 15) +
  labs(title = "Shannon Diversity Index by Site", x = "Site", y = "Shannon Index") +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

ggsave("Violin_Plot_Shannon_Index_Tot.png", plot = p_healthy, width = 12, height = 6, dpi = 300)
cat("Violin plot saved as 'Violin_Plot_Shannon_Index_Tot.png'\n")
print(p_healthy)

# ANOVA: Testing for Differences Between Sites
anova_site_healthy <- aov(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon)
anova_summary_healthy <- summary(anova_site_healthy)
print("ANOVA Results for Shannon Index (Healthy) by Site:")
print(anova_summary_healthy)

# Check ANOVA assumptions
shapiro_test_healthy <- shapiro.test(residuals(anova_site_healthy))
print("Shapiro-Wilk Test for Normality of Residuals:")
print(shapiro_test_healthy)

levene_test_healthy <- leveneTest(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon)
print("Levene's Test for Homogeneity of Variance:")
print(levene_test_healthy)

# Post-hoc tests based on assumptions
if (anova_summary_healthy[[1]]$`Pr(>F)`[1] < 0.05 & shapiro_test_healthy$p.value > 0.05 & levene_test_healthy$`Pr(>F)`[1] > 0.05) {
  posthoc_healthy <- emmeans(anova_site_healthy, pairwise ~ site)
  print("Post-hoc comparisons:")
  print(posthoc_healthy)
} else {
  print("ANOVA not significant or assumptions not met, considering Kruskal-Wallis test instead.")

  # Kruskal-Wallis test
  kruskal_test_healthy <- kruskal.test(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon)
  print("Kruskal-Wallis Test Results for Shannon Index by Site:")
  print(kruskal_test_healthy)

  # Dunn's post-hoc if Kruskal is significant
  if (kruskal_test_healthy$p.value < 0.05) {
    posthoc_dunn_healthy <- dunnTest(Shannon_Index_Healthy ~ site, data = filtered_prevsub_shannon, method = "bonferroni")
    print("\nPost-hoc pairwise comparisons (Dunn's Test):")
    print(posthoc_dunn_healthy$res)
    
    # Save results
    dunn_results <- posthoc_dunn_healthy$res %>%
      as.data.frame() %>%
      rename(Comparison = Comparison, Z = Z, P.adj = P.adj)

    write.csv(dunn_results, "Dunn_Test_Summary.csv", row.names = FALSE)
    cat("Dunn's test summary saved as 'Dunn_Test_Summary.csv'\n")
  } else {
    print("Kruskal-Wallis Test not significant, skipping post-hoc comparisons.")
  }
}

# Mixed-effects model
lmer_model_healthy <- lmer(Shannon_Index_Healthy ~ site + z_date + (1 | site), data = filtered_prevsub_shannon)
print("Mixed-Effects Model Summary:")
print(summary(lmer_model_healthy))

# Scatterplot of Shannon over time
ggplot(filtered_prevsub_shannon, aes(x = z_date, y = Shannon_Index_Healthy, color = site)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
  theme_minimal() +
  labs(
    title = "Shannon Diversity Index Over Time (z_date)",
    x = "Scaled Date (z_date)",
    y = "Shannon Index"
  ) +
  theme(legend.position = "bottom")

# Coral Cover Over Time (from subset)
ggplot(filtered_prevsub_shannon, aes(x = z_date, y = coral)) +
  geom_point() +
  geom_smooth(method = "loess", se = FALSE) +
  theme_minimal() +
  labs(
    title = "Coral Cover Over Time (Healthy Corals Subset)",
    x = "Scaled Date (z_date)",
    y = "Coral Cover"
  )


```

#Species Presence Table Healthy Counts
```{r Species_presence_table, echo=TRUE, message=FALSE, warning=FALSE}
# Load necessary libraries
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)

# Define species columns explicitly using the "Tot_" data
species_columns <- c("Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus",
                     "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians",
                     "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", 
                     "Tot_psammacorastellata", "Tot_psammacoraprofundacella", 
                     "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea", "Tot_other")

# Remove rows with NA values in species count columns
filtered_prevsub <- filtered_prevsub %>%
  drop_na(all_of(species_columns))

# Convert count data into presence/absence (1 = present, 0 = absent)
species_presence <- filtered_prevsub %>%
  select(site, all_of(species_columns)) %>%  # Select only site and species columns
  group_by(site) %>%
  summarise(across(everything(), ~ ifelse(sum(. > 0, na.rm = TRUE) > 0, 1, 0)))  # Count > 0 = Present (1)

# Rename species columns by removing "Tot_" prefix and replacing underscores
colnames(species_presence) <- colnames(species_presence) %>%
  str_replace_all("^Tot_", "") %>%  # Remove 'Tot_' prefix
  str_replace_all("_", " ")         # Replace underscores with spaces

# Print the corrected summary table
print(species_presence)

# Save table as CSV file
write.csv(species_presence, "Species_Presence_By_Site_Formatted.csv", row.names = FALSE)

# Display formatted table in R Markdown
kable(species_presence, caption = "Species Presence by Site (1 = Present, 0 = Absent)")

```

# Bleaching prevalence analysis

```{r bleaching models}

#Bleaching prevalence by site

beta_model_no_time <- brm(
  formula = bleached ~ 1 + (1 | site),
  data = filtered_prevsub,
  family = Beta(),                   # no zero inflation
  chains = 4,
  cores = 4,
  iter = 4000,
  warmup = 2000,
  control = list(adapt_delta = 0.99),
  seed = 123
)
summary(beta_model_no_time)

#### beta_model_no_time predicts an overall bleaching proportion (on average) plus site-level deviations, with no variation over time. bleaching levels vary primarily by site rather than time, with an overall average of about 24% bleaching across sites.

```

#Coral Bleaching Prevalence Plot by time

```{r filter-and-plot-bleaching, message=FALSE, warning=FALSE}
library(dplyr)
library(ggplot2)

# 1) Identify the mean and standard deviation of the original date column
original_mean_date <- mean(as.numeric(filtered_prevsub$date), na.rm = TRUE)  
original_sd_date <- sd(as.numeric(filtered_prevsub$date), na.rm = TRUE)  

# 2) Filter data: Keep only rows where bleaching data is available and within valid site-specific time ranges
filtered_prevsub_small <- filtered_prevsub %>%
  filter(!is.na(bleached)) %>%
  group_by(site) %>%
  mutate(
    min_zdate = min(z_date, na.rm = TRUE),
    max_zdate = max(z_date, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  filter(z_date >= min_zdate & z_date <= max_zdate) %>%
  # Convert z_date back to actual date
  mutate(
    actual_date = as.Date(z_date * original_sd_date + original_mean_date, origin = "1970-01-01"),
    month_year = format(actual_date, "%b %Y")  # Extract Month-Year (e.g., "Jan 2024")
  )

# 3) Plot bleaching trends over time with **no legend**
ggplot(filtered_prevsub_small, aes(x = actual_date, y = bleached)) +
  geom_smooth(method = "lm") +  # Linear regression trend line
  geom_point(size = 5, shape = 21, aes(fill = site)) +  # Points colored by site
  facet_wrap(~site, scales = "free_x") +  # Separate panels for each site
  scale_x_date(date_labels = "%b %Y", date_breaks = "1 month", minor_breaks = "1 month") +  # More frequent ticks
  theme_bw() +
  labs(y = "Proportion Bleached", x = "Month-Year") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    legend.position = "none"  # **Removes the legend**
  )

```

# ZIB model of bleaching prevalence by species

```{r multivariate_model_total_bleached_species_prev} 
# Fit the multivariate zero-inflated beta model
bleachedspecies_model <- brm(
  mvbind(poc_bleached, por_bleached, pav_bleached, psamm_bleached, other_corals_bleaching) ~ 
    poc_bleached_structural_zero + por_bleached_structural_zero + pav_bleached_structural_zero +
    psamm_bleached_structural_zero + other_corals_bleaching_structural_zero + (1 | site),
  data = filtered_prevsub,
  family = zero_inflated_beta(),
  iter = 4000,
  control = list(adapt_delta = 0.99),
  cores = 4,
  seed = 123
)

# Summarize the model
summary(bleachedspecies_model)
```


#SIMPER Analysis full model

```{r preprocess_SIMPER_data, message=FALSE, warning=FALSE}

# Load necessary libraries
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)

# Read and preprocess data
prevsub <- read_csv("prevsub.csv", col_types = cols(
  bleached = col_double(),
  diseased = col_double(),
  coral = col_double(),
  mean_temp = col_double(),
  max_temp = col_double(),
  min_temp = col_double(),
  macroalgae = col_double(),
  turf = col_double(),
  cyanobacteria = col_double(),
  caulerpa = col_double(),
  deadcoral = col_double(),
  cca = col_double(),
  other = col_double(), 
  rubble = col_double(),
  sand = col_double(),
  rock = col_double(),
  proppoc = col_double(),
  proppor = col_double(),
  proppav = col_double(),
  proppsam = col_double(),
  propoth = col_double(),
  poc_mbb = col_double(),
  por_mbb = col_double(),
  pav_mbb = col_double(),
  psam_mbb = col_double(),
  other_mbb = col_double(),
  poc_bleached = col_double(),
  por_bleached = col_double(),
  pav_bleached = col_double(),
  psamm_bleached = col_double(),
  other_corals_bleaching = col_double(),
  totalpoc = col_double(),
  totalpor = col_double(),
  totalpav = col_double(),
  totalpsamm = col_double(),
  totalother = col_double(),
  mean_temp = col_double(),
  max_temp = col_double(),
  min_temp = col_double(),
  dive_temp = col_double(),
  ph = col_double(),
  sal = col_double(),
  vis = col_double(),
  oxi = col_double(),
  nit = col_double(),
  pho = col_double(),
  mean_mon_sst = col_double(),
  max_mon_sst = col_double(),
  min_mon_sst = col_double(),
  site = col_character(),
  date = col_character()  # Keep date as character initially
))

# Convert 'date' column to proper Date format
prevsub <- prevsub %>% mutate(date_actual = dmy(date))

# Remove rows where all values are NA
prevsub <- prevsub %>%
    filter(rowSums(is.na(.)) != ncol(.))

# Convert 'site' to a factor
prevsub <- prevsub %>%
  mutate(site = as.factor(site))

# Step 1: Convert NA to structural zeros for total bleaching columns
bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching")
total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother")

for (i in seq_along(bleaching_cols)) {
  prevsub[[bleaching_cols[i]]] <- ifelse(
    is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0,
    0,
    prevsub[[bleaching_cols[i]]]
  )
}

# Step 2: Convert NA to structural zeros for mostly bleached and bleached data
species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb")

for (i in seq_along(species_cols)) {
  prevsub[[species_cols[i]]] <- ifelse(
    is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0,
    0,
    prevsub[[species_cols[i]]]
  )
}

# Step 3: Constrain proportions for beta regression for additional columns
additional_proportion_cols <- c(
  "coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca", "rubble", "rock", "sand", "deadcoral", "other",
  "proppoc", "proppor", "proppav", "proppsam", "propoth",
  "poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb",
  "poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching"
)

prevsub <- prevsub %>%
  mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001)))

# Step 4: Create structural zero indicators for all relevant columns
for (i in seq_along(total_cols)) {
  prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse(
    prevsub[[total_cols[i]]] == 0,
    1,
    0
  )
  prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse(
    prevsub[[total_cols[i]]] == 0,
    1,
    0
  )
}

# Constrain response variables for zero_inflated_beta family
prevsub <- prevsub %>%
  mutate(
    macroalgae = pmax(pmin(macroalgae, 0.9999), 0.0001),
    caulerpa = pmax(pmin(caulerpa, 0.9999), 0.0001),
    turf = pmax(pmin(turf, 0.9999), 0.0001),
    cyanobacteria = pmax(pmin(cyanobacteria, 0.9999), 0.0001),
    cca = pmax(pmin(cca, 0.9999), 0.0001)
  )

# Filter for selected sites
selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo")
filtered_simper_data <- prevsub %>%
  filter(site %in% selected_sites) %>%
  mutate(site = droplevels(site)) 

# Define "Before" and "After" categories based on actual dates
filtered_simper_data <- filtered_simper_data %>%
  mutate(merged_time = case_when(
    date_actual >= as.Date("2024-03-27") & date_actual <= as.Date("2024-07-18") ~ "Before",
    date_actual >= as.Date("2024-08-20") ~ "After",
    TRUE ~ NA_character_
  )) %>%
  drop_na(merged_time)

# Check if both time groups exist
print(table(filtered_simper_data$merged_time))

# Remove specific dates from Barco Somero & Ancla
filtered_simper_data <- filtered_simper_data %>%
  filter(!(site == "Barco Somero" & format(date_actual, "%Y-%m") == "2024-03")) %>%
  filter(!(site == "Ancla" & format(date_actual, "%Y-%m") == "2024-04"))
```


```{r SIMPER_analysis_During_After_Correction_March_12, message=FALSE, warning=FALSE}
library(vegan)
library(dplyr)
library(tidyr)


# 1) Time-Based SIMPER (Grouping by Merged Time Period)

composition_vars <- c("coral", "bleached", "macroalgae", "turf", 
                      "cyanobacteria", "cca", "deadcoral")

# Subset data and ensure numeric values
simper_data <- filtered_simper_data %>%
  select(site, merged_time, all_of(composition_vars)) %>%
  mutate(across(all_of(composition_vars), as.numeric)) %>%  # Ensure numeric conversion
  drop_na() %>%  # Remove rows with missing values
  mutate(merged_time = as.factor(merged_time))  # Convert merged_time to factor

# Check if both groups exist before running SIMPER
if (length(unique(simper_data$merged_time)) < 2) {
  stop("Error: SIMPER requires at least two unique time periods ('Before' and 'After').")
}

# Convert data frame to numeric matrix
simper_matrix <- as.matrix(simper_data %>% select(-site, -merged_time))

# Debugging: Check structure of matrix before SIMPER
print(str(simper_matrix))

# Run SIMPER analysis
simper_results <- simper(simper_matrix, simper_data$merged_time, permutations = 100)

# Extract SIMPER summary
simper_summary_time <- summary(simper_results)

# Convert SIMPER results into a structured data frame
simper_df_time <- bind_rows(
  lapply(names(simper_summary_time), function(grp) {
    if (!is.null(simper_summary_time[[grp]])) {
      df <- as.data.frame(simper_summary_time[[grp]])
      df$Group <- grp               # The comparison label (e.g. "Before vs After")
      df$Variable <- rownames(simper_summary_time[[grp]])
      df
    }
  }),
  .id = "Comparison_Group"
)

# Rename columns and select order
simper_df_time <- simper_df_time %>%
  rename(Average_Contribution = average, 
         SD = sd, 
         Consistency_Ratio = ratio,
         Mean_Before = ava, 
         Mean_After = avb, 
         Cumulative_Contribution = cumsum,
         P_Value = p) %>%
  select(Group, Comparison_Group, Variable, Average_Contribution, Consistency_Ratio,
         Mean_Before, Mean_After, Cumulative_Contribution, P_Value)

# Save results
write.csv(simper_df_time, "SIMPER_results_Before_After_by_Date.csv", row.names = FALSE)

# Print first few rows
cat("\n--- SIMPER RESULTS (Before vs After by Date) ---\n")
print(head(simper_df_time))


# 2) Site-Level SIMPER (Loop Over Each Site for "Before" vs "After")


# Store results for each site
site_results_list <- list()

# Identify unique sites
all_sites <- unique(filtered_simper_data$site)

# Loop through each site
for (this_site in all_sites) {
  
  cat("\nChecking site:", this_site)

  # Subset for this site and ensure numeric values
  site_data <- filtered_simper_data %>%
    filter(site == this_site) %>%
    select(merged_time, all_of(composition_vars)) %>%
    mutate(across(all_of(composition_vars), as.numeric)) %>%  # Convert to numeric
    group_by(merged_time) %>%
    mutate(across(all_of(composition_vars), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>%  # Replace NAs with time-period mean
    ungroup() %>%
    mutate(merged_time = as.factor(merged_time))

# Debugging: Check if missing values remain
print(anyNA(site_data))  # Should return FALSE


# Debugging: Print Before/After counts for each site
print(table(site_data$merged_time))

  # If only one merged time period exists, skip
  if (length(unique(site_data$merged_time)) < 2) {
    cat("\nSkipping site:", this_site, "because it does not have both 'Before' and 'After' data.\n")
    next
  }

  # Convert to numeric matrix
  site_matrix <- as.matrix(site_data %>% select(-merged_time))

  # Debugging: Check if numeric conversion was successful
  print(str(site_matrix))

  if (nrow(site_matrix) != length(site_data$merged_time)) {
    stop(paste("Mismatch between data matrix and merged_time factor for site:", this_site))
  }

  # Run SIMPER for this site
  site_simper <- simper(site_matrix, site_data$merged_time, permutations = 100)

  # Summarize results
  site_summary <- summary(site_simper)

  # Convert site-level SIMPER to a data frame
  site_simper_df <- bind_rows(
    lapply(names(site_summary), function(grp) {
      if (!is.null(site_summary[[grp]])) {
        df <- as.data.frame(site_summary[[grp]])
        df$Comparison <- grp
        df$Variable <- rownames(site_summary[[grp]])
        df
      }
    }),
    .id = "Comparison_Group"
  ) %>%
    rename(Average_Contribution = average, 
           SD = sd, 
           Consistency_Ratio = ratio,
           Mean_Before = ava, 
           Mean_After = avb, 
           Cumulative_Contribution = cumsum,
           P_Value = p)

  # Store results
  site_results_list[[this_site]] <- site_simper_df
}

# Combine site-level results into a single data frame
site_simper_all <- bind_rows(lapply(names(site_results_list), function(s) {
  out_df <- site_results_list[[s]]
  out_df$Site <- s
  out_df
}))

cat("\nSummary of Top Contributors to Temporal Change:\n")
print(head(simper_df_time))

# Save site-based results
write.csv(site_simper_all, "SIMPER_results_Before_After_by_Site_Date.csv", row.names = FALSE)

# Print a sample
cat("\n--- SITE-BASED SIMPER RESULTS (Before vs After by Date) ---\n")
if (nrow(site_simper_all) > 0) {
  print(head(site_simper_all))
} else {
  cat("No sites had both 'Before' and 'After' data available.\n")
}

```

###SIMPER Before March to Sept After Dec2024 to Feb 2025

```{r SIMPER_Preprocessing_and_Analysis_DEC2024, message=FALSE, warning=FALSE}
# --- Load required libraries ---
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(vegan)

# --- Read and preprocess data ---
prevsub <- read_csv("prevsub.csv", col_types = cols(
  bleached = col_double(), diseased = col_double(), coral = col_double(),
  mean_temp = col_double(), max_temp = col_double(), min_temp = col_double(),
  macroalgae = col_double(), turf = col_double(), cyanobacteria = col_double(),
  caulerpa = col_double(), deadcoral = col_double(), cca = col_double(),
  other = col_double(), rubble = col_double(), sand = col_double(),
  rock = col_double(), proppoc = col_double(), proppor = col_double(),
  proppav = col_double(), proppsam = col_double(), propoth = col_double(),
  poc_mbb = col_double(), por_mbb = col_double(), pav_mbb = col_double(),
  psam_mbb = col_double(), other_mbb = col_double(),
  poc_bleached = col_double(), por_bleached = col_double(),
  pav_bleached = col_double(), psamm_bleached = col_double(),
  other_corals_bleaching = col_double(), totalpoc = col_double(),
  totalpor = col_double(), totalpav = col_double(), totalpsamm = col_double(),
  totalother = col_double(), dive_temp = col_double(), ph = col_double(),
  sal = col_double(), vis = col_double(), oxi = col_double(),
  nit = col_double(), pho = col_double(),
  mean_mon_sst = col_double(), max_mon_sst = col_double(),
  min_mon_sst = col_double(), site = col_character(), date = col_character()
))

# Convert and clean dates
prevsub <- prevsub %>%
  mutate(date_actual = dmy(date)) %>%
  filter(rowSums(is.na(.)) != ncol(.)) %>%
  mutate(site = as.factor(site))

# --- Structural zero imputation for bleaching and species ---
bleaching_cols <- c("poc_bleached", "por_bleached", "pav_bleached", "psamm_bleached", "other_corals_bleaching")
species_cols <- c("poc_mbb", "por_mbb", "pav_mbb", "psam_mbb", "other_mbb")
total_cols <- c("totalpoc", "totalpor", "totalpav", "totalpsamm", "totalother")

for (i in seq_along(bleaching_cols)) {
  prevsub[[bleaching_cols[i]]] <- ifelse(is.na(prevsub[[bleaching_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[bleaching_cols[i]]])
  prevsub[[species_cols[i]]] <- ifelse(is.na(prevsub[[species_cols[i]]]) & prevsub[[total_cols[i]]] == 0, 0, prevsub[[species_cols[i]]])
}

# --- Constrain values for beta regression ---
additional_proportion_cols <- c("coral", "bleached", "diseased", "macroalgae", "turf", "cyanobacteria", "caulerpa", "cca",
                                "rubble", "rock", "sand", "deadcoral", "other", "proppoc", "proppor", "proppav",
                                "proppsam", "propoth", species_cols, bleaching_cols)

prevsub <- prevsub %>%
  mutate(across(all_of(additional_proportion_cols), ~ pmax(pmin(., 0.9999), 0.0001)))

# --- Add structural zero flags ---
for (i in seq_along(total_cols)) {
  prevsub[[paste0(species_cols[i], "_structural_zero")]] <- ifelse(prevsub[[total_cols[i]]] == 0, 1, 0)
  prevsub[[paste0(bleaching_cols[i], "_structural_zero")]] <- ifelse(prevsub[[total_cols[i]]] == 0, 1, 0)
}

# --- Filter for selected sites and assign time periods ---
selected_sites <- c("Tina", "Ancla", "Chorro", "San Josecito", "Cueva", "Barco Somero", "Esquina", "Este Intermedio", "Barco Profundo")

filtered_simper_data_DEC <- prevsub %>%
  filter(site %in% selected_sites) %>%
  mutate(site = droplevels(site),
         merged_time = case_when(
           date_actual >= as.Date("2024-03-27") & date_actual < as.Date("2024-12-01") ~ "Before",
           date_actual >= as.Date("2024-12-01") ~ "After",
           TRUE ~ NA_character_
         )) %>%
  drop_na(merged_time) %>%
  filter(!(site == "Barco Somero" & format(date_actual, "%Y-%m") == "2024-03"),
         !(site == "Ancla" & format(date_actual, "%Y-%m") == "2024-04"))

# Optional: check balance
table(filtered_simper_data_DEC$merged_time)

# Export filtered data
write_csv(filtered_simper_data_DEC, "filtered_SIMPER_data_DEC_after.csv")

# --- SIMPER analysis for Before vs After (DEC onwards) ---
composition_vars <- c("coral", "bleached", "macroalgae", "turf", "cyanobacteria", "cca", "deadcoral")

simper_data_dec <- filtered_simper_data_DEC %>%
  select(site, merged_time, all_of(composition_vars)) %>%
  mutate(across(all_of(composition_vars), as.numeric)) %>%
  drop_na() %>%
  mutate(merged_time = as.factor(merged_time))

# Create matrix and run SIMPER
simper_matrix <- as.matrix(simper_data_dec %>% select(-site, -merged_time))
simper_results_dec <- simper(simper_matrix, simper_data_dec$merged_time, permutations = 100)
simper_summary_dec <- summary(simper_results_dec)

# Format SIMPER summary
simper_df_dec <- bind_rows(
  lapply(names(simper_summary_dec), function(grp) {
    if (!is.null(simper_summary_dec[[grp]])) {
      df <- as.data.frame(simper_summary_dec[[grp]])
      df$Group <- grp
      df$Variable <- rownames(simper_summary_dec[[grp]])
      df
    }
  }),
  .id = "Comparison_Group"
) %>%
  rename(Average_Contribution = average, 
         SD = sd, 
         Consistency_Ratio = ratio,
         Mean_Before = ava, 
         Mean_After = avb, 
         Cumulative_Contribution = cumsum,
         P_Value = p) %>%
  select(Group, Comparison_Group, Variable, Average_Contribution, Consistency_Ratio,
         Mean_Before, Mean_After, Cumulative_Contribution, P_Value)

write.csv(simper_df_dec, "SIMPER_results_Dec2024_Before_After.csv", row.names = FALSE)

# --- Site-level SIMPER loop ---
site_results_list_dec <- list()
all_sites <- unique(filtered_simper_data_DEC$site)

for (this_site in all_sites) {
  site_data <- filtered_simper_data_DEC %>%
    filter(site == this_site) %>%
    select(merged_time, all_of(composition_vars)) %>%
    mutate(across(all_of(composition_vars), as.numeric)) %>%
    group_by(merged_time) %>%
    mutate(across(all_of(composition_vars), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>%
    ungroup() %>%
    mutate(merged_time = as.factor(merged_time))
  
  if (length(unique(site_data$merged_time)) < 2) next
  
  site_matrix <- as.matrix(site_data %>% select(-merged_time))
  site_simper <- simper(site_matrix, site_data$merged_time, permutations = 100)
  site_summary <- summary(site_simper)
  
  site_simper_df <- bind_rows(
    lapply(names(site_summary), function(grp) {
      if (!is.null(site_summary[[grp]])) {
        df <- as.data.frame(site_summary[[grp]])
        df$Comparison <- grp
        df$Variable <- rownames(site_summary[[grp]])
        df
      }
    }),
    .id = "Comparison_Group"
  ) %>%
    rename(Average_Contribution = average, 
           SD = sd, 
           Consistency_Ratio = ratio,
           Mean_Before = ava, 
           Mean_After = avb, 
           Cumulative_Contribution = cumsum,
           P_Value = p)
  
  site_results_list_dec[[this_site]] <- site_simper_df
}

# Combine and export all site-level SIMPER outputs
site_simper_all_dec <- bind_rows(lapply(names(site_results_list_dec), function(s) {
  out_df <- site_results_list_dec[[s]]
  out_df$Site <- s
  out_df
}))

write.csv(site_simper_all_dec, "SIMPER_results_Dec2024_Site_Based.csv", row.names = FALSE)

```


#### Model Testing for Coral Cover through time

```{r coral_cover_linear_model filtered}
# -----------------------------------------------------------
# 1) Load necessary libraries
# -----------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidybayes)
library(brms)

# -----------------------------------------------------------
# 2) Fit a linear zero-inflated beta model
# -----------------------------------------------------------
# Make sure your dataset 'filtered_prevsub' has columns:
#    coral (proportion or 0-1 scale),
#    z_date (numeric or scaled time),
#    site (factor),
#    and that you've removed any NA rows if needed.

coralcov_linear <- brm(
  formula = coral ~ z_date + (1 | site),
  data    = filtered_prevsub_coral,
  family  = zero_inflated_beta(link = "logit"),
  chains  = 4,
  iter    = 4000,
  seed    = 123,
  control = list(adapt_delta = 0.95)
)
summary(coralcov_linear)

# -----------------------------------------------------------
# 3) Create a new data frame for prediction
# -----------------------------------------------------------
zdate_min <- min(filtered_prevsub$z_date, na.rm = TRUE)
zdate_max <- max(filtered_prevsub$z_date, na.rm = TRUE)
zdate_seq <- seq(zdate_min, zdate_max, length.out = 100)

newdata_plot <- expand.grid(
  z_date = zdate_seq,
  site   = unique(filtered_prevsub$site)
)

# -----------------------------------------------------------
# 4) Obtain fitted draws on the response scale
# -----------------------------------------------------------
# Note: 'add_fitted_draws()' is deprecated; consider using 'add_epred_draws()' in new code.
fitted_draws_linear <- add_fitted_draws(
  model      = coralcov_linear,
  newdata    = newdata_plot,
  re_formula = NULL,       # include random intercept by site
  scale      = "response"  # get predictions on [0..1] scale
)

# -----------------------------------------------------------
# 5) Summarize posterior draws
# -----------------------------------------------------------
fitted_summary_linear <- fitted_draws_linear %>%
  group_by(z_date, site) %>%
  summarise(
    mean  = mean(.value),
    lower = quantile(.value, 0.025),
    upper = quantile(.value, 0.975),
    .groups = "drop"
  )

# -----------------------------------------------------------
# 6) Plot the fitted linear trend
# -----------------------------------------------------------
ggplot(fitted_summary_linear, aes(x = z_date, y = mean, color = site, fill = site)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, color = NA) +
  geom_line(size = 1) +
  labs(
    title = "Fitted Linear Curve (Zero-Inflated Beta Model)",
    x = "z_date (Scaled Time)",
    y = "Predicted Coral Cover"
  ) +
  theme_minimal(base_size = 14) +
  scale_color_brewer(palette = "Paired") +
  scale_fill_brewer(palette = "Paired")
```

##Coral cover ZIB model

```{r coral_cover_model filtered}
# Coral cover Model 2: Coral cover through time and site effects for filtered sites
coralcov2 <- brm(
  coral ~ poly(z_date, 2) + (1 | site), 
  data = filtered_prevsub_coral,
  family = zero_inflated_beta(),
  control = list(adapt_delta = 0.99),
  cores = 4,
  iter = 4000,
  seed = 123
)
summary(coralcov2)
```

```{r coral_cov_time}
# Coral cover by time for filtered data
m1 <- brm(
  coral ~ z_date + (1 | site),
  data = filtered_prevsub_coral,
  family = zero_inflated_beta(),
  prior = prior(normal(0, 1), class = b) + prior(gamma(4, 0.1), class = phi),
  cores = 4,
  seed = 9,
  control = list(adapt_delta = 0.95),
  iter = 10000,
  thin = 10,
  warmup = 2000
)
summary(m1)
conditional_effects(m1)

```

```{r polynomial_random_slopes, echo=TRUE, message=FALSE, warning=FALSE}
# ----------------------------------------------------------
# 1) Load required libraries
# ----------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidybayes)   # for add_*_draws
library(brms)        # for Bayesian regression modeling
library(loo)         # for leave-one-out cross-validation

# ----------------------------------------------------------
# 2) Define and Fit the Polynomial + Random Slopes Model
# ----------------------------------------------------------
# We assume your data frame is 'filtered_prevsub' with:
#   - 'coral' (proportion),
#   - 'z_date' (scaled time),
#   - 'site' (factor),
#   - possibly zero-inflation if needed

# Example formula: polynomial in z_date + random intercept + random slopes for the polynomial
# The notation (poly(z_date, 2, raw = TRUE) || site) 
# means random intercept plus random slopes for the polynomial terms, uncorrelated.
# If you want correlated random slopes, use a single vertical bar | instead of ||.
coralcov_poly_randslope <- brm(
  formula = coral ~ poly(z_date, 2, raw = TRUE) + (poly(z_date, 2, raw = TRUE) || site),
  data    = filtered_prevsub_coral,
  family  = zero_inflated_beta(link = "logit"),  # or just beta(link="logit") if no zero inflation
  chains  = 4,
  iter    = 4000,
  warmup  = 2000,
  seed    = 123,
  control = list(adapt_delta = 0.95)
)

# Print a quick summary of the new model
print(summary(coralcov_poly_randslope))

# ----------------------------------------------------------
# 3) Compare the New Model with Existing Models via LOO
# ----------------------------------------------------------
# Suppose you already have:
#   - coralcov_linear (linear model, random intercept)
#   - coralcov2       (polynomial model, random intercept)
#   - coralcov_linear_slope (linear model, random slope)
#   - coralcov_poly_randslope (this new polynomial + random slope model)

# Compute LOO for the new model
loo_poly_randslope <- loo(coralcov_poly_randslope)
# Compute LOO for m1
loo_m1 <- loo(m1)

# For example, compare with coralcov2:
loo_poly2 <- loo(coralcov2)
loo_linear       <- loo(coralcov_linear)


# Compare
model_comp <- loo_compare(loo_m1, loo_poly_randslope, loo_linear, loo_poly2)
cat("\n--- LOO Comparison: coralcov_poly_randslope vs coralcov2 ---\n")
print(model_comp)

```

```{r coral_cover_Linear Regressions}

library(dplyr)
library(broom)
library(purrr)
library(tidyr)
library(ggplot2)

# Run linear regression for each site and extract summary statistics
lm_results <- filtered_prevsub_coral %>%
  group_by(site) %>%
  nest() %>%
  mutate(
    n = map_int(data, nrow),  # Count number of observations per site
    model = map(data, ~ tryCatch(lm(coral ~ z_date, data = .), error = function(e) NULL)),  # Fit model
    results = map(model, ~ if (!is.null(.)) tidy(.) else NULL)  # Extract summary
  ) %>%
  unnest(results) %>%
  filter(term == "z_date") %>%
  select(site, n, term, estimate, std.error, statistic, p.value)  # Keep relevant stats

# Rename columns for clarity
colnames(lm_results) <- c("Site", "n", "Term", "Beta", "SE", "t_value", "p_value")

# Print the final results
print(lm_results)

# Save to CSV
write.csv(lm_results, "site_specific_coral_trends_with_n.csv", row.names = FALSE)

# Plot trends with confidence intervals and sample sizes
ggplot(lm_results, aes(x = Site, y = Beta, ymin = Beta - SE, ymax = Beta + SE)) +
  geom_point(size = 3, color = "blue") +
  geom_errorbar(width = 0.3, color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Site-Specific Coral Cover Trends Over Time",
       subtitle = "Numbers indicate sample size (n)",
       x = "Site",
       y = "Trend (Change in Coral Cover per Unit Time)") +
  theme_minimal() +
  geom_text(aes(label = paste0("n=", n)), hjust = -0.2, size = 4)


```


##Coral cover slopes by site
```{r coral_Site_random_slopes_Model_polynomial_regressions}
library(brms)
library(tidybayes)
library(dplyr)

# Extract fixed effects using correct parameter names
fixed_effects <- fixef(coralcov_poly_randslope)[c("polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2"), "Estimate"]

# Extract site-level random effects
site_effects <- as.data.frame(ranef(coralcov_poly_randslope)$site[, , 1])  # Extracting mean estimates

# Rename columns correctly
colnames(site_effects) <- c("Intercept", "polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2")
site_effects$Site <- rownames(site_effects)

# Compute site-specific slopes by adding random effects to fixed effects
site_effects$Final_Slope_Linear <- fixed_effects["polyz_date2rawEQTRUE1"] + site_effects$polyz_date2rawEQTRUE1
site_effects$Final_Slope_Quadratic <- fixed_effects["polyz_date2rawEQTRUE2"] + site_effects$polyz_date2rawEQTRUE2

# Extract only the "Estimate" values (first column in the second dimension)
site_effects <- as.data.frame(ranef(coralcov_poly_randslope)$site[, "Estimate", ])

# Rename columns correctly
colnames(site_effects) <- c("Intercept", "polyz_date2rawEQTRUE1", "polyz_date2rawEQTRUE2")
site_effects$Site <- rownames(site_effects)

# Compute site-specific slopes by adding random effects to fixed effects
site_effects$Final_Slope_Linear <- fixed_effects["polyz_date2rawEQTRUE1"] + site_effects$polyz_date2rawEQTRUE1
site_effects$Final_Slope_Quadratic <- fixed_effects["polyz_date2rawEQTRUE2"] + site_effects$polyz_date2rawEQTRUE2

# Save site-specific trends to CSV
write.csv(site_effects, "site_specific_trends.csv", row.names = FALSE)

# Print results
print(site_effects)

```

```{r coral_Site_polynomial plots}

library(ggplot2)
library(scales)

# Custom function to determine appropriate date breaks
date_breaks_fn <- function(x) {
  rng <- range(x, na.rm = TRUE)
  if(as.numeric(rng[2] - rng[1]) > 730) {  # More than 2 years of data
    return(seq(rng[1], rng[2], by = "6 months"))
  } else {
    return(seq(rng[1], rng[2], by = "1 month"))
  }
}

# Create a quadratic trend plot of coral cover over time, faceted by site
coralcover_plot2b <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) +
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE) +  # Quadratic fit
  geom_point(size = 4, shape = 21, aes(fill = site)) +
  facet_wrap(~site, scales = "free_x") +  # Free x-axis, same y-axis
  theme_bw(base_size = 12) +
  labs(y = "Proportion Coral Cover", x = "Date") +
  guides(fill = "none") +
  scale_x_date(
    date_labels = "%b %Y",
    breaks = date_breaks_fn
  ) +
  scale_y_continuous(
    limits = c(0, 0.6),
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    strip.text = element_text(size = 10)
  )

# Display the plot
print(coralcover_plot2b)

# Save the plot to a file
ggsave(
  filename = "coral_cover_plot_quadratic.png",
  plot = coralcover_plot2b,
  width = 12,
  height = 14,
  dpi = 300
)

```

```{r coral_cover_plot}
coralcover_plot1 <- ggplot(filtered_prevsub_coral, aes(x = z_date, y = coral)) +
  geom_smooth(method = "lm") +
  geom_point(size = 5, shape = 21, aes(fill = site)) +
  facet_wrap(~site, scales = "free_x") +
  theme_bw() +
  labs(y = "Proportion Coral Cover", x = "Scaled Date") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

coralcover_plot1
```

### Coral Cover: Temporal, depth and algal trends

```{r coral_cover_plot_actual date}
library(tidyverse)
library(lubridate)
library(scales)

# Custom function to determine appropriate date breaks
date_breaks_fn <- function(x) {
  rng <- range(x, na.rm = TRUE)
  if(as.numeric(rng[2] - rng[1]) > 730) {  # More than 2 years of data
    return(seq(rng[1], rng[2], by = "6 months"))
  } else {
    return(seq(rng[1], rng[2], by = "1 month"))
  }
}

# Create a plot of coral cover over time with dynamic date breaks, faceted by site
coralcover_plot2 <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) +
  geom_smooth(method = "lm") +
  geom_point(size = 5, shape = 21, aes(fill = site)) +
  facet_wrap(~site, scales = "free_x") +
  theme_bw(base_size = 14) +
  labs(y = "Proportion Coral Cover", x = "Date") +
  guides(fill = "none") +
  scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

coralcover_plot2

# Save the plot to a file in the working directory
ggsave(filename = "coral_cover_plot.png",
       plot = coralcover_plot2,
       width = 10,
       height = 7,
       dpi = 300)

# Create a plot of coral cover over time with dynamic date breaks, faceted by site
coralcover_plot2a <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) +
  geom_smooth(method = "lm") +
  geom_point(size = 4, shape = 21, aes(fill = site)) +
  facet_wrap(~site, scales = "free_y") +  # Allow free y-axis scaling per site
  theme_bw(base_size = 12) +
  labs(y = "Proportion Coral Cover", x = "Date") +
  guides(fill = "none") +
  scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) +
  scale_y_continuous(
    limits = c(0, NA),           # Start y-axis from 0, upper limit auto
    expand = expansion(mult = c(0.02, 0.1)) # Add padding above and below data
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    strip.text = element_text(size = 10)
  )

# Display the plot
coralcover_plot2
# Display the plot
coralcover_plot2a

# Save the plot to a file in the working directory
ggsave(
  filename = "coral_cover_plot.png",
  plot = coralcover_plot2,
  width = 12,
  height = 10,
  dpi = 300
)


# Create a plot of coral cover over time with dynamic date breaks, faceted by site
coralcover_plot2b <- ggplot(filtered_prevsub_coral, aes(x = date, y = coral)) +
  geom_smooth(method = "lm") +
  geom_point(size = 4, shape = 21, aes(fill = site)) +
  facet_wrap(~site, scales = "free_x") +  # Free x-axis, same y-axis
  theme_bw(base_size = 12) +
  labs(y = "Proportion Coral Cover", x = "Date") +
  guides(fill = "none") +
  scale_x_date(date_labels = "%b %Y", breaks = date_breaks_fn) +
  scale_y_continuous(
    limits = c(0, 0.6),                  # Fixed y-axis range
    expand = expansion(mult = c(0, 0.05)) # Slight padding above the highest value
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    strip.text = element_text(size = 10)   # Smaller facet labels
  )

# Display the plot
coralcover_plot2b

# Save the plot to a file in the working directory with increased height
ggsave(
  filename = "coral_cover_plot_taller.png",
  plot = coralcover_plot2b,
  width = 12,    # Width remains the same
  height = 14,   # Increased height for better y-axis readability
  dpi = 300
)


```


#Linear Regression Models Coral Cover each Site through time (Though not linear trends)
```{r Coral_Cover_LINEAR_trends_site_analysis, echo=TRUE, message=FALSE, warning=FALSE}

# Count the number of data points per site
site_sample_sizes <- filtered_prevsub_coral %>%
  group_by(site) %>%
  summarise(n = n())

# Fit linear models for each site, handling potential errors
site_models <- filtered_prevsub_coral %>%
  group_by(site) %>%
  group_split() %>%
  set_names(filtered_prevsub_coral %>% group_by(site) %>% group_keys() %>% pull(site)) %>%
  map(~ tryCatch(lm(coral ~ z_date, data = .), error = function(e) NULL))

# Remove failed models (if any)
valid_models <- site_models[!sapply(site_models, is.null)]

# Extract model statistics safely and merge with sample sizes
site_trends <- map2_df(valid_models, names(valid_models), function(model, site_name) {
  if (!is.null(model)) {
    tidy(model) %>%
      filter(term == "z_date") %>% # Keep only slope estimates
      mutate(site = site_name) %>% # Assign correct site name
      select(site, term, estimate, std.error, statistic, p.value) # Keep relevant columns
  }
}) %>%
  left_join(site_sample_sizes, by = "site") # Merge with sample size data

# Rename columns for clarity
colnames(site_trends) <- c("Site", "Term", "Beta", "SE", "t_value", "p_value", "n")

# Save output to CSV file
write_csv(site_trends, "site_specific_coral_trends_with_n.csv")

site_trends <- site_trends %>%
  arrange(desc(abs(Beta)))  # Optional: sort by magnitude of slope

# Print the results
print(site_trends)

# Plot the trends with confidence intervals, including sample sizes in labels
ggplot(site_trends, aes(x = Site, y = Beta, ymin = Beta - SE, ymax = Beta + SE)) +
  geom_point(size = 3, color = "blue") +
  geom_errorbar(width = 0.3, color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Site-Specific Coral Cover Trends Over Time",
       subtitle = "Numbers indicate sample size (n)",
       x = "Site",
       y = "Trend (Change in Coral Cover per Unit Time)") +
  theme_minimal() +
  geom_text(aes(label = paste0("n=", n)), hjust = -0.2, size = 4)
```


##Coral cover and Depth polynomial model

```{r depth-nonlinear, echo=TRUE, message=FALSE, warning=FALSE}
# Nonlinear (polynomial) model
depth_poly_model <- lm(coral ~ poly(depth, 2), data = filtered_prevsub_coral)

# Summary of the polynomial model
summary(depth_poly_model)

# Visualization: Coral cover by depth (nonlinear trend)
ggplot(filtered_prevsub_coral, aes(x = depth, y = coral)) +
    geom_point(alpha = 0.6, color = "blue") +
    geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = TRUE, color = "darkgreen") +
    theme_minimal() +
    labs(
        title = "Nonlinear Relationship Between Coral Cover and Depth",
        x = "Depth (m)",
        y = "Coral Cover"
    )
```


## Algal change through time

```{r Algae through time, echo=TRUE, message=FALSE, warning=FALSE}
# Load necessary libraries
if (!requireNamespace("mgcv", quietly = TRUE)) install.packages("mgcv")
if (!requireNamespace("gratia", quietly = TRUE)) install.packages("gratia")

library(mgcv)
library(gratia)
library(dplyr)

# Define benthic variables to model trends
benthic_vars <- c("macroalgae", "caulerpa", "turf", "cyanobacteria", "cca")

# Store models in a list
algae_models <- list()

# Fit GAMs for each benthic variable with time as a smooth term and site as a random effect
for (var in benthic_vars) {
  formula_str <- paste(var, "~ s(z_date) + s(site, bs='re')")
  model_formula <- as.formula(formula_str)
  
  algae_models[[var]] <- gam(
    formula = model_formula,
    data = filtered_prevsub_coral,
    family = betar(),  # Beta regression for proportional data
    method = "REML"    # Penalized smoothing
  )
  
  cat("\nGAM Summary for", var, ":\n")
  print(summary(algae_models[[var]]))
}

# Plot smooth effects of time for each benthic variable
par(mfrow = c(2, 3))  # Arrange plots in a grid
for (var in benthic_vars) {
  draw(algae_models[[var]])
}

# Model diagnostics
for (var in benthic_vars) {
  cat("\nChecking diagnostics for", var, ":\n")
  gam.check(algae_models[[var]])
}

```

### coral and turf LM by site and date

```{r coral-turf-interaction, echo=TRUE, message=FALSE, warning=FALSE}
# Analyze the relationship between coral cover and turf
coral_turf_model <- lm(coral ~ turf + site + z_date, data = filtered_prevsub_coral)

# Model summary
summary(coral_turf_model)

# Visualize coral cover vs. turf
print(
  ggplot(filtered_prevsub, aes(x = turf, y = coral, color = site)) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "lm", se = FALSE, color = "blue") +
    theme_minimal() +
    labs(
        title = "Coral Cover vs. Turf Cover",
        x = "Turf Cover",
        y = "Coral Cover"
    )
)

```

###Local Temperature (HOBO) and coral cover among sites

```{r coral HOBO temp site fixed effect}
# Load required package
library(betareg)

# 1) Filter dataset to rows with valid temperature measurements, coral cover, and site
temp_data_coral <- filtered_prevsub_coral %>%
  filter(!is.na(mean_temp) & !is.na(max_temp) & !is.na(coral) & !is.na(site))

# 2) Fit a beta regression model of coral cover on mean_temp, max_temp, and site
beta_coral_full <- betareg(
  coral ~ mean_temp + max_temp + site,
  data = temp_data_coral
)

# Examine the model summary
summary(beta_coral_full)

# 3) If one temperature variable (e.g., max_temp) is not significant, refine the model
beta_coral_refined <- betareg(
  coral ~ mean_temp + site,
  data = temp_data_coral
)

# Compare the two models by AIC (optional)
AIC(beta_coral_full, beta_coral_refined)

# 4) Inspect the refined model
summary(beta_coral_refined)

# 5) (Optional) Plot diagnostic checks for the refined model
par(mfrow = c(2, 2))
plot(beta_coral_refined)
```


#Proportion of Coral Taxa Analysis
## multivariate_beta_model shows no sign change in prop
```{r Beta_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE}
library(brms)

# For multiple responses in a single brms model, we specify a list of families—one for each response.
# Because you have 5 responses, pass a list of 5 'beta()' families.

multivariate_beta_model <- brm(
  mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date + (1 | site),
  data = filtered_prevsub,
  family = list(Beta(), Beta(), Beta(), Beta(), Beta()),  # Capital B
  control = list(adapt_delta = 0.95),
  cores = 4,
  iter = 4000,
  seed = 123,
)

summary(multivariate_beta_model)


```

## multivariate_beta_interaction allow the effect of time (z_date) to vary by site
```{r Beta_interaction_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE}

multivariate_beta_interaction <- brm(
  mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date * site + (1 | site),
  data = filtered_prevsub,
  family = list(Beta(), Beta(), Beta(), Beta(), Beta()),
  control = list(adapt_delta = 0.95),
  cores = 4,
  iter = 4000,
  seed = 123
)

summary(multivariate_beta_interaction)

```

## Random Slopes proportion coral species - using this model
```{r Beta_random_slopes_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE}

multivariate_beta_random_slopes <- brm(
  mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date + (1 + z_date | site),
  data = filtered_prevsub,
  family = list(Beta(), Beta(), Beta(), Beta(), Beta()),
  control = list(adapt_delta = 0.95),
  cores = 4,
  iter = 4000,
  seed = 123
)
summary(multivariate_beta_random_slopes)

```

## separate beta regression models for each site (no additional fit)
```{r site_models_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE}

library(dplyr)
library(brms)

# Fit models for each site
site_models <- filtered_prevsub %>%
  group_by(site) %>%
  group_map(~ brm(
    mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ z_date,
    data = .x,
    family = list(Beta(), Beta(), Beta(), Beta(), Beta()),
    control = list(adapt_delta = 0.95),
    cores = 4,
    iter = 4000,
    seed = 123
  ))

# Function to extract fixed effects from each model
extract_fixed_effects <- function(model) {
  coef_summary <- as.data.frame(fixef(model))  # Extract fixed effects
  coef_summary$Parameter <- rownames(coef_summary)  # Add parameter names
  return(coef_summary)
}

# Apply the function to all site models and store results in a list
site_fixed_effects <- lapply(site_models, extract_fixed_effects)

# Convert the list into a single data frame for easier visualization
site_results <- bind_rows(site_fixed_effects, .id = "Site")

# Rename columns appropriately based on the actual number of columns in fixef()
colnames(site_results) <- c("Site", "Parameter", "Estimate", "SE", "L95_CI", "U95_CI")

# Display the cleaned-up results
print(site_results)

# Save results to a CSV file for further analysis
write.csv(site_results, "Site_Specific_Beta_Regression_Results.csv", row.names = FALSE)


```

## If the trends appear non-linear, use smooth terms for time: (NOTE trends are approx linear)

```{r non-linear assess, echo=TRUE, message=FALSE, warning=FALSE}
# Install necessary packages if not already installed
if (!requireNamespace("mgcv", quietly = TRUE)) install.packages("mgcv")
if (!requireNamespace("gratia", quietly = TRUE)) install.packages("gratia")

# Load libraries
library(mgcv)   # For GAM models
library(gratia) # For GAM visualization
library(dplyr)  # Data manipulation

# Define coral proportion variables
coral_taxa <- c("proppoc", "proppor", "proppav", "proppsam", "propoth")

# Store GAM models in a list
gam_models <- list()

# Loop over each coral taxon and fit a GAM
for (taxon in coral_taxa) {
  formula_str <- paste(taxon, "~ s(z_date, k=5) + s(site, bs='re')")  # Smooth time, random site effect
  model_formula <- as.formula(formula_str)
  
  gam_models[[taxon]] <- gam(
    formula = model_formula,
    data = filtered_prevsub,
    family = betar(),  # Beta regression for proportional data
    method = "REML"    # Penalized smoothing
  )
  
  cat("\nGAM Summary for", taxon, ":\n")
  print(summary(gam_models[[taxon]]))
}
# Extract significance of smooth terms
for (taxon in coral_taxa) {
  cat("\nChecking significance of smooth term for", taxon, ":\n")
  print(summary(gam_models[[taxon]])$s.table)
}
# Fit linear models for comparison
lm_models <- list()
for (taxon in coral_taxa) {
  lm_models[[taxon]] <- lm(as.formula(paste(taxon, "~ z_date + site")), data = filtered_prevsub)
}

# Compare models using AIC
aic_results <- data.frame(
  Taxon = coral_taxa,
  GAM_AIC = sapply(gam_models, AIC),
  LM_AIC = sapply(lm_models, AIC)
)

print(aic_results)
# Set up plotting area
par(mfrow = c(2, 3))

# Loop over each coral taxon and plot the smooth function of time
for (taxon in coral_taxa) {
  draw(gam_models[[taxon]])  # Uses gratia package for smooth function visualization
}

```


```{r GAM_models_prop_coral_species, echo=TRUE, message=FALSE, warning=FALSE}
multivariate_gam <- brm(
  mvbind(proppoc, proppor, proppav, proppsam, propoth) ~ s(z_date, by = site) + (1 | site),
  data = filtered_prevsub,
  family = list(Beta(), Beta(), Beta(), Beta(), Beta()),
  control = list(adapt_delta = 0.95),
  cores = 4,
  iter = 4000,
  seed = 123
)
```


### PCA Analysis 

##PCA Full Model

```{r pca-analysis-cleaned-filtered, echo=TRUE, message=FALSE, warning=FALSE, cache=TRUE}

library(dplyr)
library(ggplot2)
library(ggfortify)

# Summarize data by site for the columns of interest
pca_data <- filtered_prevsub_coral %>%
  group_by(site) %>%
  summarise(
    across(
      c(bleached, depth, coral, macroalgae, 
        turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, 
        totalpoc, totalpor, totalpav, totalpsamm, totalother, proppoc, proppor, proppsam, proppav, propoth),
      ~ mean(.x, na.rm = TRUE)
    )
  ) %>%
  ungroup()

# Keep only numeric columns (excluding site)
pca_data_numeric_filtered <- pca_data %>%
  dplyr::select(where(is.numeric))

# Remove columns with too many missing values (>50%)
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  dplyr::select(where(~ mean(is.na(.)) < 0.5))

# Remove rows with too many missing values (>50%)
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  filter(rowMeans(is.na(.)) < 0.5)

# Replace remaining NAs with column means
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# Remove rows where all values are zero
pca_data_numeric_filtered <- pca_data_numeric_filtered[rowSums(pca_data_numeric_filtered, na.rm = TRUE) > 0, ]

# Remove zero-variance columns
zero_var_cols <- which(sapply(pca_data_numeric_filtered, function(col) {
  if (all(is.na(col))) return(TRUE) # Remove all-NA columns
  if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns
  return(FALSE)
}))

if (length(zero_var_cols) > 0) {
  message("Removing zero-variance columns: ", paste(names(pca_data_numeric_filtered)[zero_var_cols], collapse = ", "))
  pca_data_numeric_filtered <- pca_data_numeric_filtered[, -zero_var_cols, drop = FALSE]
}

# Ensure at least two columns remain for PCA
if (ncol(pca_data_numeric_filtered) < 2) {
  stop("Error: PCA requires at least two numeric columns. Check dataset preparation.")
}

# Scale data
pca_data_scaled_filtered <- scale(pca_data_numeric_filtered)

# Remove rows with any non-finite values
row_is_finite_filtered <- apply(pca_data_scaled_filtered, 1, function(x) all(is.finite(x)))
pca_data_cleaned_filtered <- pca_data_scaled_filtered[row_is_finite_filtered, , drop = FALSE]
pca_data_used_filtered <- pca_data_numeric_filtered[row_is_finite_filtered, , drop = FALSE]  # FIXED LINE

# Ensure there are enough rows for PCA
if (nrow(pca_data_cleaned_filtered) < 2) {
  stop("Error: PCA requires at least two observations after cleaning.")
}

# Perform PCA
pca_res_filtered <- tryCatch(
  prcomp(pca_data_cleaned_filtered, center = TRUE, scale. = TRUE),
  error = function(e) {
    stop("Error in PCA computation: ", e$message)
  }
)

# Create a data frame with PCA scores and site info
if ("site" %in% colnames(pca_data)) {
  pca_scores_filtered <- data.frame(pca_res_filtered$x, site = pca_data$site[row_is_finite_filtered])
} else {
  pca_scores_filtered <- data.frame(pca_res_filtered$x)
}

# Extract PCA loadings
pca_loadings_filtered <- as.data.frame(pca_res_filtered$rotation)
pca_loadings_filtered$Variable <- rownames(pca_loadings_filtered)

# Print numerical outputs
print(summary(pca_res_filtered))
print(head(pca_scores_filtered))
print(pca_loadings_filtered)

# Save PCA outputs
write.csv(summary(pca_res_filtered)$importance, "PCA_Summary.csv", row.names = TRUE)
write.csv(pca_scores_filtered, "PCA_Scores.csv", row.names = TRUE)
write.csv(pca_loadings_filtered, "PCA_Loadings.csv", row.names = TRUE)

cat("PCA summary, scores, and loadings have been saved as CSV files.\n")

# PCA plot
if ("site" %in% colnames(pca_scores_filtered)) {
  pca_plot_filtered <- autoplot(
    pca_res_filtered,
    data = pca_scores_filtered,
    colour = "site",
    label = FALSE,
    loadings = TRUE,
    loadings.colour = "black",
    loadings.label = TRUE,
    loadings.label.colour = "black"
  ) +
    geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 3) +
    theme(legend.position = "none")
} else {
  pca_plot_filtered <- autoplot(
    pca_res_filtered,
    loadings = TRUE,
    loadings.colour = "black",
    loadings.label = TRUE,
    loadings.label.colour = "black"
  ) +
    theme(legend.position = "none")
}

# Display PCA plot
print(pca_plot_filtered)

```

##PCA During Bleaching
```{r PCA_analysis_up to July 2024 During Bleaching data, echo=TRUE, message=FALSE, warning=FALSE}

# Load required libraries
library(dplyr)
library(ggplot2)
library(ggfortify)

# 1) Filter dataset to include only data up to July 2024
filtered_pca_data <- filtered_prevsub_coral %>%
  filter(date <= as.Date("2024-07-18"))  # Ensuring only relevant timeframe

# 2) Summarize data by site for the columns of interest
pca_data <- filtered_pca_data %>%
  group_by(site) %>%
  summarise(
    across(
      c(bleached, diseased, depth, coral, mean_temp, max_temp, min_temp, macroalgae, 
        turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, 
        totalpoc, totalpor, totalpav, totalpsamm, totalother, ph, sal, oxi, 
        nit, pho),
      ~ mean(.x, na.rm = TRUE)
    )
  ) %>%
  ungroup()

# 3) Filter dataset: Remove selected variables
pca_data_filtered <- pca_data %>%
  dplyr::select(-c(diseased, pho, ph, sal, min_temp, oxi, mean_temp, max_temp, nit, rubble, sand, cyanobacteria))

# 4) Keep only numeric columns (excluding site)
pca_data_numeric_filtered <- pca_data_filtered %>%
  dplyr::select(where(is.numeric))

# 5) Remove columns with too many missing values (>50% missing)
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  dplyr::select(where(~ mean(is.na(.)) < 0.5))

# 6) Remove rows with too many missing values (>50% missing)
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  filter(rowMeans(is.na(.)) < 0.5)

# 7) Replace remaining NAs with column means
pca_data_numeric_filtered <- pca_data_numeric_filtered %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# 8) Remove rows where **all** values are zero
pca_data_numeric_filtered <- pca_data_numeric_filtered[rowSums(pca_data_numeric_filtered, na.rm = TRUE) > 0, ]

# 9) Remove zero-variance columns
zero_var_cols <- which(sapply(pca_data_numeric_filtered, function(col) {
  if (all(is.na(col))) return(TRUE) # Remove all-NA columns
  if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns
  return(FALSE)
}))

if (length(zero_var_cols) > 0) {
  message("Removing zero-variance columns: ", paste(names(pca_data_numeric_filtered)[zero_var_cols], collapse = ", "))
  pca_data_numeric_filtered <- pca_data_numeric_filtered[, -zero_var_cols, drop = FALSE]
}

# 10) Ensure at least two columns remain for PCA
if (ncol(pca_data_numeric_filtered) < 2) {
    stop("Error: PCA requires at least two numeric columns. Check dataset preparation.")
}

# 11) Scale data (mean=0, sd=1)
pca_data_scaled_filtered <- scale(pca_data_numeric_filtered)

# 12) Remove rows with any non-finite values (NA/Inf)
row_is_finite_filtered <- apply(pca_data_scaled_filtered, 1, function(x) all(is.finite(x)))
pca_data_cleaned_filtered <- pca_data_scaled_filtered[row_is_finite_filtered, , drop = FALSE]
pca_data_used_filtered <- pca_data_filtered[row_is_finite_filtered, , drop = FALSE]

# 13) Perform PCA on the cleaned data
pca_res_filtered <- tryCatch(
    prcomp(pca_data_cleaned_filtered, center = TRUE, scale. = TRUE),
    error = function(e) {
        stop("Error in PCA computation: ", e$message)
    }
)

# 14) Create a data frame with PCA scores + site info
if ("site" %in% colnames(pca_data_used_filtered)) {
    pca_scores_filtered <- data.frame(pca_res_filtered$x, site = pca_data_used_filtered$site)
} else {
    pca_scores_filtered <- data.frame(pca_res_filtered$x)
}

# 15) Extract and organize PCA loadings
pca_loadings_filtered <- as.data.frame(pca_res_filtered$rotation)
pca_loadings_filtered$Variable <- rownames(pca_loadings_filtered)

# Print numerical outputs: PCA Summary & Scores
print(summary(pca_res_filtered))
print(head(pca_scores_filtered))
print(pca_loadings_filtered)

# Save PCA Summary
pca_summary <- summary(pca_res_filtered)
write.csv(pca_summary$importance, "PCA_Summary_Before_tot.csv", row.names = TRUE)

# Save PCA Scores
write.csv(pca_scores_filtered, "PCA_Scores_Before_tot.csv", row.names = TRUE)

# Save PCA Loadings
write.csv(pca_loadings_filtered, "PCA_Loadings_Before_tot.csv", row.names = TRUE)

# Confirm successful export
cat("PCA summary, scores, and loadings have been saved as CSV files with 'Before' in filenames.\n")


# Load required packages
library(ggplot2)
library(ggfortify)
library(RColorBrewer)

# Ensure enough colors for the number of sites
num_sites <- length(unique(pca_scores_filtered$site))
site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites)  # Dynamically generate colors

# Create PCA plot with improved formatting
if ("site" %in% colnames(pca_scores_filtered)) {
    pca_plot_filtered <- autoplot(
        pca_res_filtered,
        data = pca_scores_filtered,
        colour = "site",
        label = FALSE,              # Remove default text labels from autoplot
        loadings = TRUE,
        loadings.colour = "black",   
        loadings.label = TRUE,
        loadings.label.colour = "black"
    ) +
    geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") +  # Increase font size
    scale_color_manual(values = site_colors) +  # Use dynamically generated color palette
    theme_minimal(base_size = 14) +  # Increase overall font size
    theme(
        legend.position = "none",  # Remove legend
        axis.title = element_text(size = 16, face = "bold"),  # Increase axis title size
        axis.text = element_text(size = 14),  # Increase axis text size
        plot.title = element_text(size = 18, face = "bold")  # Increase title size
    ) +
    labs(title = "PCA Analysis of Reef Benthic Composition (Before)")
} else {
    pca_plot_filtered <- autoplot(
        pca_res_filtered,
        loadings = TRUE,
        loadings.colour = "black",   
        loadings.label = TRUE,
        loadings.label.colour = "black"
    ) +
    theme_minimal(base_size = 14) +
    theme(
        legend.position = "none",
        axis.title = element_text(size = 16, face = "bold"),
        axis.text = element_text(size = 14),
        plot.title = element_text(size = 18, face = "bold")
    ) +
    labs(title = "PCA Analysis of Reef Benthic Composition (Before)")
}

# Explicitly plot the PCA visualization
plot(pca_plot_filtered)

# Save the updated PCA plot
ggsave("PCA_Plot_Before_tot.png", plot = pca_plot_filtered, width = 8, height = 6, dpi = 300)

# Confirm successful export
cat("Updated PCA plot has been saved as 'PCA_Plot_Before.png' with improved font sizes and site colors.\n")

```

##PCA After Bleaching
```{r PCA_analysis_After_AugusttoDecember_2024, echo=TRUE, message=FALSE, warning=FALSE}

# Load required libraries
library(dplyr)
library(ggplot2)
library(ggfortify)

# 1) Filter dataset to include only data from August 20, 2024, to December 2024
filtered_pca_data_after <- filtered_prevsub %>%
  filter(date >= as.Date("2024-08-20") & date <= as.Date("2024-12-31"))  

# 2) Summarize data by site for the columns of interest
pca_data_after <- filtered_pca_data_after %>%
  group_by(site) %>%
  summarise(
    across(
      c(bleached, diseased, depth, coral, mean_temp, max_temp, min_temp, macroalgae, 
        turf, cyanobacteria, caulerpa, deadcoral, cca, rubble, sand, rock, 
        totalpoc, totalpor, totalpav, totalpsamm, totalother, ph, sal, oxi, 
        nit, pho),
      ~ mean(.x, na.rm = TRUE)
    )
  ) %>%
  ungroup()

# 3) Filter dataset: Remove selected variables (same as "Before" PCA)
pca_data_filtered_after <- pca_data_after %>%
  dplyr::select(-c(diseased, pho, ph, sal, min_temp, oxi, mean_temp, max_temp, nit, rubble, sand, cyanobacteria))

# 4) Keep only numeric columns (excluding site)
pca_data_numeric_after <- pca_data_filtered_after %>%
  dplyr::select(where(is.numeric))

# 5) Remove columns with too many missing values (>50% missing)
pca_data_numeric_after <- pca_data_numeric_after %>%
  dplyr::select(where(~ mean(is.na(.)) < 0.5))

# 6) Remove rows with too many missing values (>50% missing)
pca_data_numeric_after <- pca_data_numeric_after %>%
  filter(rowMeans(is.na(.)) < 0.5)

# 7) Replace remaining NAs with column means
pca_data_numeric_after <- pca_data_numeric_after %>%
  mutate(across(everything(), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# 8) Remove rows where **all** values are zero
pca_data_numeric_after <- pca_data_numeric_after[rowSums(pca_data_numeric_after, na.rm = TRUE) > 0, ]

# 9) Remove zero-variance columns
zero_var_cols <- which(sapply(pca_data_numeric_after, function(col) {
  if (all(is.na(col))) return(TRUE) # Remove all-NA columns
  if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns
  return(FALSE)
}))

if (length(zero_var_cols) > 0) {
  message("Removing zero-variance columns: ", paste(names(pca_data_numeric_after)[zero_var_cols], collapse = ", "))
  pca_data_numeric_after <- pca_data_numeric_after[, -zero_var_cols, drop = FALSE]
}

# 10) Ensure at least two columns remain for PCA
if (ncol(pca_data_numeric_after) < 2) {
    stop("Error: PCA requires at least two numeric columns. Check dataset preparation.")
}

# 11) Scale data (mean=0, sd=1)
pca_data_scaled_after <- scale(pca_data_numeric_after)

# 12) Remove rows with any non-finite values (NA/Inf)
row_is_finite_after <- apply(pca_data_scaled_after, 1, function(x) all(is.finite(x)))
pca_data_cleaned_after <- pca_data_scaled_after[row_is_finite_after, , drop = FALSE]
pca_data_used_after <- pca_data_filtered_after[row_is_finite_after, , drop = FALSE]

# 13) Perform PCA on the cleaned data
pca_res_after <- tryCatch(
    prcomp(pca_data_cleaned_after, center = TRUE, scale. = TRUE),
    error = function(e) {
        stop("Error in PCA computation: ", e$message)
    }
)

# 14) Create a data frame with PCA scores + site info
if ("site" %in% colnames(pca_data_used_after)) {
    pca_scores_after <- data.frame(pca_res_after$x, site = pca_data_used_after$site)
} else {
    pca_scores_after <- data.frame(pca_res_after$x)
}

# 15) Extract and organize PCA loadings
pca_loadings_after <- as.data.frame(pca_res_after$rotation)
pca_loadings_after$Variable <- rownames(pca_loadings_after)

# Print numerical outputs: PCA Summary & Scores
print(summary(pca_res_after))
print(head(pca_scores_after))
print(pca_loadings_after)

# Save PCA Summary
pca_summary_after <- summary(pca_res_after)
write.csv(pca_summary_after$importance, "PCA_Summary_After_tot.csv", row.names = TRUE)

# Save PCA Scores
write.csv(pca_scores_after, "PCA_Scores_After_tot.csv", row.names = TRUE)

# Save PCA Loadings
write.csv(pca_loadings_after, "PCA_Loadings_After_tot.csv", row.names = TRUE)

# Confirm successful export
cat("PCA summary, scores, and loadings have been saved as CSV files with 'After' in filenames.\n")

# Load required packages
library(ggplot2)
library(ggfortify)
library(RColorBrewer)

# Ensure enough colors for the number of sites
num_sites <- length(unique(pca_scores_after$site))
site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites)  # Dynamically generate colors

# Create PCA plot with improved formatting
if ("site" %in% colnames(pca_scores_after)) {
    pca_plot_after <- autoplot(
        pca_res_after,
        data = pca_scores_after,
        colour = "site",
        label = FALSE,              # Remove default text labels from autoplot
        loadings = TRUE,
        loadings.colour = "black",   
        loadings.label = TRUE,
        loadings.label.colour = "black"
    ) +
    geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") +  # Increase font size
    scale_color_manual(values = site_colors) +  # Use dynamically generated color palette
    theme_minimal(base_size = 14) +  # Increase overall font size
    theme(
        legend.position = "none",  # Remove legend
        axis.title = element_text(size = 16, face = "bold"),  # Increase axis title size
        axis.text = element_text(size = 14),  # Increase axis text size
        plot.title = element_text(size = 18, face = "bold")  # Increase title size
    ) +
    labs(title = "PCA Analysis of Reef Benthic Composition (After)")
} else {
    pca_plot_after <- autoplot(
        pca_res_after,
        loadings = TRUE,
        loadings.colour = "black",   
        loadings.label = TRUE,
        loadings.label.colour = "black"
    ) +
    theme_minimal(base_size = 14) +
    theme(
        legend.position = "none",
        axis.title = element_text(size = 16, face = "bold"),
        axis.text = element_text(size = 14),
        plot.title = element_text(size = 18, face = "bold")
    ) +
    labs(title = "PCA Analysis of Reef Benthic Composition (After)")
}

# Explicitly plot the PCA visualization
plot(pca_plot_after)

# Save the updated PCA plot
ggsave("PCA_Plot_After_tot.png", plot = pca_plot_after, width = 8, height = 6, dpi = 300)

# Confirm successful export
cat("Updated PCA plot has been saved as 'PCA_Plot_After.png' with improved font sizes and site colors.\n")


```

##PCA Latest Data
```{r PCA_analysis_Latest_AfterJanuary_2025, echo=TRUE, message=FALSE, warning=FALSE}

# Load required libraries
library(dplyr)
library(ggplot2)
library(ggfortify)
library(RColorBrewer)

# Ensure dataset exists
if (!exists("filtered_prevsub")) {
  stop("Error: The dataset 'filtered_prevsub' is missing. Ensure it's loaded before running this script.")
}

# Step 1: Remove Chorro transects 1, 2, and 3 **before selecting latest data**
if ("transect" %in% colnames(filtered_prevsub)) {
  filtered_prevsub <- filtered_prevsub %>%
    filter(!(site == "Chorro" & transect %in% c(1, 2, 3)))  # Exclude these transects
} else {
  stop("Error: 'transect' column is missing. Ensure it's included in filtered_prevsub.")
}

# Step 2: Confirm Chorro still exists but without transects 1, 2, 3
remaining_transects <- unique(filtered_prevsub$transect[filtered_prevsub$site == "Chorro"])
if (length(remaining_transects) == 0) {
  stop("Error: No Chorro transects remain after filtering. Check dataset.")
} else {
  cat("✅ Chorro transects 1, 2, and 3 removed. Remaining transects:\n")
  print(remaining_transects)
}

# Step 3: Now filter dataset to **only include data from January 2025 onwards**
filtered_pca_data_latest <- filtered_prevsub %>%
  filter(date >= as.Date("2025-01-01"))  

# Step 4: Select **latest recorded data per site** after filtering
latest_pca_data <- filtered_pca_data_latest %>%
  group_by(site) %>%
  slice_max(order_by = date, n = 1, with_ties = FALSE) %>%
  ungroup()

# Step 5: Select the key benthic variables (aligning with Before/After PCA)
pca_data_latest <- latest_pca_data %>%
  select(
    site, bleached, coral, turf, cca, depth, rock,
    deadcoral, totalpoc, totalother, totalpor, deadcoral, totalpsamm, totalpav, macroalgae, caulerpa,
  ) %>%
  drop_na()  # Remove any rows with missing values

# Step 6: Keep only numeric columns (excluding site)
pca_data_numeric_latest <- pca_data_latest %>%
  select(where(is.numeric))

# Step 7: Remove zero-variance columns
zero_var_cols <- which(sapply(pca_data_numeric_latest, function(col) {
  if (all(is.na(col))) return(TRUE) # Remove all-NA columns
  if (var(col, na.rm = TRUE) == 0) return(TRUE) # Remove zero-variance columns
  return(FALSE)
}))

if (length(zero_var_cols) > 0) {
  message("Removing zero-variance columns: ", paste(names(pca_data_numeric_latest)[zero_var_cols], collapse = ", "))
  pca_data_numeric_latest <- pca_data_numeric_latest[, -zero_var_cols, drop = FALSE]
}

# Ensure at least two columns remain for PCA
if (ncol(pca_data_numeric_latest) < 2) {
    stop("Error: PCA requires at least two numeric columns. Check dataset preparation.")
}

# Step 8: Scale data (mean=0, sd=1)
pca_data_scaled_latest <- scale(pca_data_numeric_latest)

# Step 9: Remove rows with any non-finite values (NA/Inf)
row_is_finite_latest <- apply(pca_data_scaled_latest, 1, function(x) all(is.finite(x)))
pca_data_cleaned_latest <- pca_data_scaled_latest[row_is_finite_latest, , drop = FALSE]
pca_data_used_latest <- pca_data_latest[row_is_finite_latest, , drop = FALSE]

# Step 10: Perform PCA on the cleaned data
pca_res_latest <- tryCatch(
    prcomp(pca_data_cleaned_latest, center = TRUE, scale. = TRUE),
    error = function(e) {
        stop("Error in PCA computation: ", e$message)
    }
)

# Step 11: Create a data frame with PCA scores + site info
if ("site" %in% colnames(pca_data_used_latest)) {
    pca_scores_latest <- data.frame(pca_res_latest$x, site = pca_data_used_latest$site)
} else {
    pca_scores_latest <- data.frame(pca_res_latest$x)
}

# Step 12: Extract and organize PCA loadings
pca_loadings_latest <- as.data.frame(pca_res_latest$rotation)
pca_loadings_latest$Variable <- rownames(pca_loadings_latest)

# Print numerical outputs: PCA Summary & Scores
print(summary(pca_res_latest))
print(head(pca_scores_latest))
print(pca_loadings_latest)

# Save PCA Summary
write.csv(summary(pca_res_latest)$importance, "PCA_Summary_Latest_tot.csv", row.names = TRUE)
write.csv(pca_scores_latest, "PCA_Scores_Latest_tot.csv", row.names = TRUE)
write.csv(pca_loadings_latest, "PCA_Loadings_Latest_tot.csv", row.names = TRUE)

cat("✅ PCA results saved as CSV files with 'Latest' in filenames.\n")

# Step 13: Create PCA plot
num_sites <- length(unique(pca_scores_latest$site))
site_colors <- colorRampPalette(brewer.pal(8, "Dark2"))(num_sites)  # Generate colors

pca_plot_latest <- autoplot(
    pca_res_latest,
    data = pca_scores_latest,
    colour = "site",
    label = FALSE,
    loadings = TRUE,
    loadings.colour = "black",
    loadings.label = TRUE,
    loadings.label.colour = "black"
) +
geom_text(aes(label = site, color = site), vjust = -1, hjust = 0.5, size = 5, fontface = "bold") +
scale_color_manual(values = site_colors) +
theme_minimal(base_size = 14) +
theme(
    legend.position = "none",
    axis.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 14),
    plot.title = element_text(size = 18, face = "bold")
) +
labs(title = "PCA Analysis of Reef Benthic Composition (Latest - 2025)")

# Plot the PCA visualization
plot(pca_plot_latest)

# Save the updated PCA plot
ggsave("PCA_Plot_Latest_tot.png", plot = pca_plot_latest, width = 8, height = 6, dpi = 300)

cat("✅ PCA plot saved as 'PCA_Plot_Latest.png' with improved formatting.\n")

```

##PCA Comparisons

```{r PCA comparisons, echo=TRUE, message=FALSE, warning=FALSE}

# Extract variance explained (proportion of variance per PC)
variance_comparison <- data.frame(
  Time_Period = c("Filtered", "After", "Latest"),
  PC1 = c(pca_res_filtered$sdev[1]^2 / sum(pca_res_filtered$sdev^2),
          pca_res_after$sdev[1]^2 / sum(pca_res_after$sdev^2),
          pca_res_latest$sdev[1]^2 / sum(pca_res_latest$sdev^2)),
  PC2 = c(pca_res_filtered$sdev[2]^2 / sum(pca_res_filtered$sdev^2),
          pca_res_after$sdev[2]^2 / sum(pca_res_after$sdev^2),
          pca_res_latest$sdev[2]^2 / sum(pca_res_latest$sdev^2))
)

print(variance_comparison)

# Check variables in each PCA loading matrix
print(rownames(pca_loadings_filtered))
print(rownames(pca_loadings_after))
print(rownames(pca_loadings_latest))

# Find common variables across all time periods
common_vars <- Reduce(intersect, list(rownames(pca_loadings_filtered), 
                                      rownames(pca_loadings_after), 
                                      rownames(pca_loadings_latest)))

print(common_vars)  # See which variables are shared across all PCAs


library(ggplot2)
library(reshape2)

# Identify common variables across all PCA loadings
common_vars <- Reduce(intersect, list(rownames(pca_loadings_filtered), 
                                      rownames(pca_loadings_after), 
                                      rownames(pca_loadings_latest)))

print(common_vars)  # Check shared variables


# Subset PCA loadings to include only common variables
pca_loadings_df <- data.frame(
  Variable = common_vars,
  PC1_Filtered = pca_loadings_filtered[common_vars, 1],  
  PC1_After = pca_loadings_after[common_vars, 1],  
  PC1_Latest = pca_loadings_latest[common_vars, 1]  
)

# Print to verify that all rows match
print(pca_loadings_df)


# Reshape for ggplot
pca_loadings_melted <- melt(pca_loadings_df, id.vars = "Variable")

# Plot PCA loadings across time
ggplot(pca_loadings_melted, aes(x = Variable, y = value, fill = variable)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  theme_minimal() +
  labs(title = "PCA Loadings Comparison Across Time Periods",
       x = "Benthic Variable", y = "PC1 Loading", fill = "Time Period")


# Scatter plot showing site movement in PCA space
plot(pca_scores_filtered[,1], pca_scores_filtered[,2], col = "blue", pch = 16, 
     xlab = "PC1", ylab = "PC2", main = "PCA Trajectories Across Time")
points(pca_scores_after[,1], pca_scores_after[,2], col = "orange", pch = 16)
points(pca_scores_latest[,1], pca_scores_latest[,2], col = "red", pch = 16)
arrows(pca_scores_filtered[,1], pca_scores_filtered[,2], pca_scores_latest[,1], pca_scores_latest[,2], col = "black", length = 0.1)

legend("topright", legend = c("Filtered", "After", "Latest"), col = c("blue", "orange", "red"), pch = 16)


```

```{r combined PCA plots, echo=TRUE, message=FALSE, warning=FALSE}
# Load required libraries
library(ggfortify)
library(patchwork)  # For combining plots

# Define a stronger color palette for better visibility
site_colors <- c(
    "Ancla" = "#E41A1C", "Barco Profundo" = "#377EB8", "Barco Somero" = "#4DAF4A",
    "Chorro" = "#984EA3", "Cueva" = "#FF7F00", "Esquina" = "#F781BF",
    "San Josecito" = "#A65628", "Tina" = "#228833", "Este Intermedio" = "#CC79A7"
)

# Function to generate PCA plots with percentage variance in axis labels
create_pca_plot <- function(pca_res, pca_scores, title) {
    # Extract variance explained by PC1 and PC2
    variance_explained <- summary(pca_res)$importance[2, 1:2] * 100  # Convert to percentage
    pc1_label <- paste0("PC1 (", round(variance_explained[1], 1), "%)")
    pc2_label <- paste0("PC2 (", round(variance_explained[2], 1), "%)")
    
    if ("site" %in% colnames(pca_scores)) {
        pca_plot <- autoplot(
            pca_res,
            data = pca_scores,
            colour = "site",
            label = FALSE,  # Remove default text labels from autoplot
            loadings = TRUE,
            loadings.colour = "black",
            loadings.label = TRUE,
            loadings.label.colour = "black"
        ) +
        scale_color_manual(values = site_colors) +  # Use site-specific colors
        geom_text(aes(label = site, color = site), 
                  vjust = -1, hjust = 0.5, size = 3.5, fontface = "bold") +  # Adjusted label size
        theme_minimal(base_size = 14) +  # Increase font size for readability
        theme(
            legend.position = "none",  # Remove legend
            axis.title = element_text(size = 16, face = "bold"),
            axis.text = element_text(size = 14),
            plot.title = element_text(size = 18, face = "bold", hjust = 0.5)
        ) +
        labs(title = title, x = pc1_label, y = pc2_label)  # Use dynamic axis labels
    } else {
        pca_plot <- autoplot(
            pca_res,
            loadings = TRUE,
            loadings.colour = "black",
            loadings.label = TRUE,
            loadings.label.colour = "black"
        ) +
        theme_minimal(base_size = 14) +
        theme(
            legend.position = "none",
            axis.title = element_text(size = 16, face = "bold"),
            axis.text = element_text(size = 14),
            plot.title = element_text(size = 18, face = "bold", hjust = 0.5)
        ) +
        labs(title = title, x = pc1_label, y = pc2_label)  # Use dynamic axis labels
    }
    
    return(pca_plot)
}

# Generate PCA plots with updated titles and dynamic axis labels
pca_plot_before <- create_pca_plot(pca_res_filtered, pca_scores_filtered, "During Bleaching")
pca_plot_after <- create_pca_plot(pca_res_after, pca_scores_after, "After Bleaching")
pca_plot_latest <- create_pca_plot(pca_res_latest, pca_scores_latest, "Latest Data")

# Combine the three plots in a single row for direct comparison
combined_pca_plot <- (pca_plot_before | pca_plot_after | pca_plot_latest)

# Display the combined PCA plots
print(combined_pca_plot)
```

##Composite Restoration Feasibility Analysis

```{r shannon-summary-analysis-latest, echo=TRUE, message=FALSE, warning=FALSE}

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(vegan)  # For Shannon diversity index calculation
library(FSA)    # For Dunn's post-hoc test
library(car)

# 1) Identify the latest date per site
latest_data <- filtered_prevsub_coral %>%
  group_by(site) %>%
  filter(date == max(date, na.rm = TRUE)) %>%
  ungroup()

# 2) Define the species count columns (Tot_ dataset)
species_count_columns <- c(
  "Tot_pocillopora", "Tot_poriteslobata", "Tot_pavonaclavus", 
  "Tot_pavonagigantea", "Tot_pavonamaldivensis", "Tot_pavonavarians",
  "Tot_pavonachiriquiensis", "Tot_pavonafrondifera", "Tot_psammacorastellata",
  "Tot_psammacoraprofundacella", "Tot_gardineroserisplanulata", "Tot_tubastraeacoccinea",
  "Tot_other"
)

# 3) Compute Shannon Index on the latest data using species counts
latest_data <- latest_data %>%
  mutate(Shannon_Index_Latest = diversity(select(., all_of(species_count_columns)), index = "shannon"))

# 4) Remove rows with NA values in species counts
latest_data <- latest_data %>%
  drop_na(all_of(species_count_columns))

# 5) Inspect the new Shannon column
summary(latest_data$Shannon_Index_Latest)

# 6) Summarize by site for the latest data
latest_shannon_summary <- latest_data %>%
  group_by(site) %>%
  summarise(
    mean_shannon = mean(Shannon_Index_Latest, na.rm = TRUE),
    sd_shannon   = sd(Shannon_Index_Latest, na.rm = TRUE),
    n            = n()
  ) %>%
  arrange(desc(mean_shannon))

print("Summary statistics for Shannon Index (Latest) by site:")
print(latest_shannon_summary)

# 7) Save computed Shannon Index data
write.csv(latest_data, "Shannon_Index_Latest_Data.csv", row.names = FALSE)
cat("Shannon Index Latest Data saved as 'Shannon_Index_Latest_Data.csv'\n")

# 8) Violin Plot of Shannon Index (LATEST) by site
p_latest <- ggplot(latest_data, aes(x = site, y = Shannon_Index_Latest, fill = site)) +
  geom_violin(trim = FALSE, scale = "width", alpha = 0.6, width = 0.9) +  
  geom_jitter(aes(fill = site), color = "black", size = 2.5, shape = 21, width = 0.2, alpha = 0.7) +  
  stat_summary(fun.data = "mean_cl_boot", colour = "black", fill = "white", size = 1, shape = 21) +  
  scale_fill_brewer(palette = "Paired") +  
  theme_minimal(base_size = 15) +
  labs(title = "Shannon Diversity Index (Latest) by Site", x = "Site", y = "Shannon Index (Latest)") +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

# Save and print the plot
ggsave("Violin_Plot_Shannon_Index_Latest.png", plot = p_latest, width = 12, height = 6, dpi = 300)
cat("Violin plot saved as 'Violin_Plot_Shannon_Index_Latest.png'\n")
print(p_latest)

# 9) Kruskal-Wallis test for Shannon diversity across sites
kruskal_test1 <- kruskal.test(Shannon_Index_Latest ~ site, data = latest_data)
cat("Kruskal-Wallis Test Results for Shannon Index by Site:\n")
print(kruskal_test1)

# 10) Dunn's post-hoc test (if Kruskal-Wallis is significant)
if (kruskal_test1$p.value < 0.05) {
  posthoc1 <- dunnTest(Shannon_Index_Latest ~ site, data = latest_data, method = "bonferroni")
  cat("\nPost-hoc pairwise comparisons (Dunn's Test):\n")
  print(posthoc1$res)
} else {
  cat("\nKruskal-Wallis Test not significant, skipping post-hoc comparisons.\n")
}

# 11) Scatter plot: Shannon Index vs. Depth
ggplot(latest_data, aes(x = depth, y = Shannon_Index_Latest)) +
  geom_point(size = 3, alpha = 0.7, color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
  theme_minimal() +
  labs(title = "Relationship between Shannon Diversity (Latest) and Depth",
       x = "Depth (m)", y = "Shannon Index (Latest)")

# 12) Spearman correlation test for Shannon Index vs. Depth
cor_test <- cor.test(latest_data$Shannon_Index_Latest, latest_data$depth, method = "spearman")
cat("\nSpearman Correlation between Shannon Index (Latest) and Depth:\n")
print(cor_test)

```


```{r Alternative_composite_feasibility_NO SIMPER, echo=TRUE, message=FALSE, warning=FALSE}
# Load required libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(brms)   # (if needed for additional modeling)

# 1) Define expert impact directions with enhanced weight for coral cover
impact_direction_alt <- data.frame(
  Variable = c("coral", "cca", "bleached", "turf", "cyanobacteria", "shannon"),
  Direction = c(+4, +2, -2, -3, -2, +2)  # Stronger positive weight for coral; increased penalties for turf and cyanobacteria
)

# 2) Summarize PCA loadings (using pca_loadings_latest dataset)
pca_summarized_alt <- pca_loadings_latest %>%
  mutate(LoadingSum = abs(PC1) + abs(PC2) + abs(PC3)) %>%
  select(Variable, LoadingSum)

# 3) Merge expert impact directions with PCA loadings
combined_df_alt <- impact_direction_alt %>%
  left_join(pca_summarized_alt, by = "Variable") %>%
  mutate(LoadingSum = if_else(is.na(LoadingSum), 0, LoadingSum)) %>%
  # Compute composite weight as the product of the loading sum and the expert impact direction
  mutate(CompositeWeight = LoadingSum * Direction)

# 4) Merge Shannon diversity into site-level data (latest_data_with_shannon)
latest_data_with_shannon <- latest_data %>%
  left_join(latest_shannon_summary %>% select(site, mean_shannon) %>% rename(shannon = mean_shannon), by = "site")

# 5) Scale the site data for each variable included in combined_df_alt
scaled_latest_alt <- latest_data_with_shannon %>%
  mutate(across(all_of(combined_df_alt$Variable), scale, .names = "scaled_{.col}"))

# 6) Convert data to long format for weighting
long_latest_alt <- scaled_latest_alt %>%
  pivot_longer(
    cols = starts_with("scaled_"),
    names_to = "Variable",
    values_to = "Scaled_Value"
  ) %>%
  mutate(Variable = sub("scaled_", "", Variable))  # Remove prefix

# 7) Compute final weighted contributions
weighted_latest_alt <- long_latest_alt %>%
  left_join(combined_df_alt %>% select(Variable, CompositeWeight), by = "Variable") %>%
  mutate(Weighted_Contribution = Scaled_Value * CompositeWeight)

# 8) Summarize feasibility scores per site
feasibility_scores_alt <- weighted_latest_alt %>%
  group_by(site) %>%
  summarise(Feasibility_Score = sum(Weighted_Contribution, na.rm = TRUE)) %>%
  arrange(desc(Feasibility_Score))

# Print final feasibility scores
cat("\nAlternative Feasibility Scores (Excluding SIMPER):\n")
print(feasibility_scores_alt)

# 9) Visualize the feasibility scores
feasibility_plot_alt <- ggplot(feasibility_scores_alt, aes(x = reorder(site, Feasibility_Score), y = Feasibility_Score, fill = Feasibility_Score)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  coord_flip() +
  labs(title = "Alternative Composite Feasibility Index by Site (Excluding SIMPER)",
       x = "Site",
       y = "Feasibility Score (Weighted)") +
  theme_minimal()

# Save the plot
ggsave("feasibility_plot_alternative.png", plot = feasibility_plot_alt, width = 8, height = 6, dpi = 300)
print(feasibility_plot_alt)
```

## Including Plots

You can also embed plots, for example:

```{r pressure, echo=FALSE}
plot(pressure)
```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.