---
title: "5kmSSTNOAA"
author: "Caroline Palmer"
date: "2025-03-02"
output: html_document
---

```{r 1985_2012_calculate_MMM_DHW_alternative, echo=TRUE, message=FALSE, warning=FALSE}
# Load necessary libraries
library(dplyr)
library(lubridate)
library(zoo)
library(ggplot2)

# Read the dataset (ensure your CSV has a column named "date")
df <- read.csv("SSTraw.csv")

# Convert the 'date' column to Date format using dd/mm/yyyy
# Ensure 'date' is treated as a character before conversion
df$date <- as.Date(as.character(df$date), format = "%Y%m%d")


# Check for missing SST values and remove them
df <- df %>% filter(!is.na(SST))

# Ensure SST column exists
if (!"SST" %in% names(df)) {
  stop("Error: SST column not found in the dataset.")
}

# Filter data for MMM calculation (1985-2012)
df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012)

# Check that data exists for the filtering range
if (nrow(df_filtered) == 0) {
  warning("No data available for the period 1985-2012. Please verify the date format and range in your dataset.")
  # Optionally, use the full dataset if that is acceptable:
  df_filtered <- df
}

# Calculate monthly mean SSTs across the filtered period
df_monthly <- df_filtered %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(mean_SST = mean(SST, na.rm = TRUE))

# Print monthly means for debugging
print(df_monthly)

# Ensure there are no missing values before running regression
if (all(is.na(df_monthly$mean_SST))) {
  stop("Error: All mean_SST values are NA. Regression cannot proceed.")
} else {
  # Perform linear regression to identify trends in monthly SSTs
  trend_model <- lm(mean_SST ~ month, data = df_monthly)
  print(summary(trend_model))
}

# Adjust climatology to baseline period (1985-1990 + 1993)
df_baseline <- df_filtered %>%
  filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993)

# Ensure baseline data exists
if (nrow(df_baseline) == 0) {
  stop("Error: No baseline data available for 1985-1990 and 1993.")
}

df_baseline_monthly <- df_baseline %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(baseline_SST = mean(SST, na.rm = TRUE))

# Determine MMM1 as the maximum of the monthly baseline means
MMM1 <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE)

# Check if MMM1 is valid
if (!is.finite(MMM1)) {
  stop("Error: MMM1 calculation resulted in a non-finite value.")
}

print(paste("Calculated MMM1:", MMM1))

# Save MMM1 value to file
write.csv(data.frame(MMM1 = MMM1), "MMM1_calculated.csv", row.names = FALSE)

# Compute DHW for the full dataset based on MMM1.
# NOAA's protocol accumulates only the excess above 1°C over MMM.
df <- df %>%
  arrange(date) %>%
  mutate(
    SST_anomaly = SST - MMM1,
    # Only count the amount above 1°C (i.e. if anomaly > 1, subtract 1; otherwise, 0)
    exceedance = ifelse(SST_anomaly > 1, SST_anomaly - 1, 0)
  )

# Compute a 12-week (84-day) rolling sum for DHW and convert to °C-weeks (divide by 7)
df <- df %>%
  mutate(DHW = rollapply(exceedance, width = 84, FUN = sum, align = "right", fill = NA) / 7)

# Replace any infinite or missing DHW values with 0
df$DHW[is.infinite(df$DHW) | is.na(df$DHW)] <- 0

# Save DHW results to file
write.csv(df, "DHW_calculated.csv", row.names = FALSE)

# Pre-calculate the maximum SST for setting plot limits
sst_max <- max(df$SST, na.rm = TRUE)

library(ggplot2)

# Plot SST, MMM1, and DHW over time
p <- ggplot(df, aes(x = date)) +
  geom_line(aes(y = SST, color = "SST (°C)"), size = 1) +
  # Scale DHW to overlay on the SST scale.
  geom_line(aes(y = DHW * (sst_max - 25) / 7 + 25, color = "DHW (°C-weeks)"), size = 1) +
  geom_hline(aes(yintercept = MMM1, color = "MMM"), linetype = "dashed", size = 1) +
  geom_smooth(aes(y = SST, color = "SST Trend"), 
              method = "lm", linetype = "solid", size = 1, se = FALSE) +
  scale_y_continuous(
    name = "SST (°C)",
    limits = c(25, sst_max),
    sec.axis = sec_axis(~ (. - 25) * 7 / (sst_max - 25),
                        name = "DHW (°C-weeks)",
                        breaks = seq(0, 7, by = 1))
  ) +
  scale_x_date(
    limits = as.Date(c("1981-01-01", "2025-12-31")),
    date_breaks = "1 year", date_labels = "%Y",
    expand = c(0.01, 0) # Add slight padding for clarity
  ) +
  scale_color_manual(
    values = c("SST (°C)" = "steelblue",
               "DHW (°C-weeks)" = "darkred",
               "MMM" = "black",
               "SST Trend" = "white")
  ) +
  labs(title = "SST, MMM1, and Degree Heating Weeks Over Time",
       x = "Year",
       color = "Legend") +
  theme_minimal(base_size = 16) +  # Increase global font size
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),  # Rotate x-axis labels for clarity
    axis.text.y = element_text(size = 14),
    axis.title = element_text(size = 18, face = "bold"),
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5)
  )

# Print the plot to the R console
print(p)

# Save the plot to file with increased width
ggsave("SST_DHW12_plot.png", plot = p, width = 14, height = 7)


```
```{r 1985_2012_calculate_MMM0_5_12_DHW_corrected, echo=TRUE, message=FALSE, warning=FALSE}
# Load necessary libraries
library(dplyr)
library(lubridate)
library(zoo)
library(tidyr)  
library(ggplot2)

# Read the dataset (ensure your CSV has a column named "date")
df <- read.csv("SSTraw.csv")

# Convert the 'date' column to Date format using dd/mm/yyyy
# Ensure 'date' is treated as a character before conversion
df$date <- as.Date(as.character(df$date), format = "%Y%m%d")


# Check for missing SST values and remove them
df <- df %>% filter(!is.na(SST))

# Ensure SST column exists
if (!"SST" %in% names(df)) {
  stop("Error: SST column not found in the dataset.")
}

# Filter data for MMM calculation (1985-2012)
df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012)

# Check that data exists for the filtering range
if (nrow(df_filtered) == 0) {
  warning("No data available for the period 1985-2012. Please verify the date format and range in your dataset.")
  # Optionally, use the full dataset if that is acceptable:
  df_filtered <- df
}

# Calculate monthly mean SSTs across the filtered period
df_monthly <- df_filtered %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(mean_SST = mean(SST, na.rm = TRUE))

# Print monthly means for debugging
print(df_monthly)

# Ensure there are no missing values before running regression
if (all(is.na(df_monthly$mean_SST))) {
  stop("Error: All mean_SST values are NA. Regression cannot proceed.")
} else {
  # Perform linear regression to identify trends in monthly SSTs
  trend_model <- lm(mean_SST ~ month, data = df_monthly)
  print(summary(trend_model))
}

# Adjust climatology to baseline period (1985-1990 + 1993)
df_baseline <- df_filtered %>%
  filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993)

# Ensure baseline data exists
if (nrow(df_baseline) == 0) {
  stop("Error: No baseline data available for 1985-1990 and 1993.")
}

df_baseline_monthly <- df_baseline %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(baseline_SST = mean(SST, na.rm = TRUE))

# Determine MMM1 as the maximum of the monthly baseline means
MMM <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE)

# Check if MMM1 is valid
if (!is.finite(MMM)) {
  stop("Error: MMM calculation resulted in a non-finite value.")
}

print(paste("Calculated MMM:", MMM))


# Save MMM1 value to file
write.csv(data.frame(MMM = MMM), "MMM_calculated.csv", row.names = FALSE)

# Compute DHW for the full dataset based on MMM.
# NOAA's protocol accumulates only the excess above 1°C over MMM.
library(dplyr)
library(zoo)  # For rolling sum

# Arrange by date
df <- df %>%
  arrange(date) %>%
  mutate(
    SST_anomaly = SST - MMM,  # Calculate anomaly as SST - MMM
    # Keep all positive SST anomalies (i.e., any value above MMM)
    exceedance = ifelse(SST_anomaly > 0.5, SST_anomaly -0.5, 0)  
  )

# Compute 12-week (84-day) rolling sum for DHW and convert to °C-weeks
df <- df %>%
  mutate(DHW = rollapply(exceedance, width = 84, FUN = sum, align = "right", fill = NA) / 7) %>%
  mutate(DHW = replace_na(DHW, 0))  # Replace missing DHW values with 0

# Save DHW results to file
write.csv(df, "DHW0_5_12_calculated.csv", row.names = FALSE)

# Pre-calculate the maximum SST for setting plot limits
sst_max <- max(df$SST, na.rm = TRUE)

library(ggplot2)
library(ggplot2)

# Define max DHW for scaling
dhw_max <- max(df$DHW, na.rm = TRUE)

# Plot SST, MMM, and DHW over time
plot2 <- ggplot(df, aes(x = date)) +
  geom_line(aes(y = SST, color = "SST (°C)"), size = 1) +

  # Correct DHW scaling and plotting
  geom_line(aes(y = (DHW / dhw_max) * (sst_max - 25) + 25, color = "DHW (°C-weeks)"), size = 1) +

  geom_hline(aes(yintercept = MMM, color = "MMM"), linetype = "dashed", size = 1) +
  geom_smooth(aes(y = SST, color = "SST Trend"), method = "lm", size = 1, se = FALSE) +

  # Correct placement of sec.axis inside scale_y_continuous()
  scale_y_continuous(
    name = "SST (°C)",
    limits = c(25, sst_max),
    sec.axis = sec_axis(~ (. - 25) * dhw_max / (sst_max - 25),
                        name = "DHW (°C-weeks)",
                        breaks = seq(0, round(dhw_max, 1), by = 1))
  ) +

  scale_x_date(
    limits = as.Date(c("1985-01-01", "2025-12-31")),
    date_breaks = "1 year", date_labels = "%Y",
    expand = c(0.01, 0)
  ) +

  scale_color_manual(
    values = c("SST (°C)" = "steelblue",
               "DHW (°C-weeks)" = "darkred",
               "MMM" = "black",
               "SST Trend" = "white")
  ) +

  labs(title = "SST, MMM, and DHW Over Time",
       x = "Year",
       color = "Legend") +

  theme_minimal(base_size = 16) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title = element_text(size = 18, face = "bold"),
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5)
  )

# Print the plot
print(plot2)

# Save to file
ggsave("SST_DHW0_5_12_plot.png", plot = plot2, width = 14, height = 7)


```

```{r USING THIS ONEcombined_bleachingMMM_coolingclimMean0.5_frequency, echo=TRUE, message=FALSE, warning=FALSE}

# Load necessary libraries
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)

# Read the dataset and convert the 'date' column to Date format
df <- read.csv("SSTraw.csv")
df$date <- as.Date(as.character(df$date), format = "%Y%m%d")
df <- df %>% filter(!is.na(SST))

### Part 1: Calculate Bleaching Frequency ###
# Define the baseline period for bleaching threshold: 1985-2012
df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012)

# Compute monthly mean SSTs over the filtered period (for each calendar month)
df_monthly <- df_filtered %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(mean_SST = mean(SST, na.rm = TRUE))

# Refine the climatology using selected years (1985–1990 and 1993) to reduce noise
df_baseline <- df_filtered %>% 
  filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993)

df_baseline_monthly <- df_baseline %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(baseline_SST = mean(SST, na.rm = TRUE))

# Determine MMM as the maximum of the refined monthly baseline means
MMM <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE)
print(paste("Calculated MMM:", MMM))

# Define the bleaching threshold as MMM + 0.5
bleaching_threshold <- MMM +0.5

# Calculate the number of days per year with SST > bleaching_threshold
yearly_bleaches <- df %>%
  mutate(year = year(date)) %>%
  group_by(year) %>%
  summarise(bleaching_days = sum(SST > bleaching_threshold, na.rm = TRUE))

# Write bleaching frequency to a CSV file
write.csv(yearly_bleaches, "yearly_bleaching_frequency.csv", row.names = FALSE)

### Part 2: Calculate Cooling Frequency ###
# For the cooling threshold, compute the climatological monthly mean for each month (1985–2012)
df_cooling_baseline <- df_filtered %>%
  filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993)

baseline_monthly_mean <- df_cooling_baseline %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(clim_mean = mean(SST, na.rm = TRUE))

baseline_monthly_mean <- baseline_monthly_mean %>%
  mutate(cooling_threshold = clim_mean - 0.5)


# Join the monthly cooling thresholds back to the full dataset
df_cooling <- df %>%
  mutate(month = month(date), year = year(date)) %>%
  left_join(baseline_monthly_mean, by = "month") %>%
  mutate(cooling_event = if_else(SST < cooling_threshold, 1, 0))

# Calculate the number of cooling days per year
yearly_cooling <- df_cooling %>%
  group_by(year) %>%
  summarise(cooling_days = sum(cooling_event, na.rm = TRUE))

# Write cooling frequency to a CSV file
write.csv(yearly_cooling, "yearly_cooling_frequency.csv", row.names = FALSE)

### Part 3: Combine and Plot the Frequencies ###
# Combine bleaching and cooling frequencies into one data frame.
# For plotting, cooling frequencies will be shown as negative values.
combined_freq <- yearly_bleaches %>%
  rename(bleaching = bleaching_days) %>%
  left_join(yearly_cooling %>% rename(cooling = cooling_days), by = "year") %>%
  mutate(cooling = -cooling) %>%
  pivot_longer(cols = c(bleaching, cooling), names_to = "Event", values_to = "Days")

# Recode the Event variable to "Warm" and "Cool"
combined_freq <- combined_freq %>%
  mutate(Event = dplyr::recode(Event, bleaching = "Warm", cooling = "Cool"))

# Create a bar plot showing warm events (positive, in firebrick) and cool events (negative, in steelblue)
combined <- ggplot(combined_freq, aes(x = factor(year), y = Days, fill = Event)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("Warm" = "firebrick", "Cool" = "steelblue")) +
  labs(title = "Annual Frequency of Bleaching and Cooling Events",
       x = "Year", y = "Frequency (Days)",
       fill = "Event") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

plot(combined)

ggsave("combine_freq.png", plot = combined, width = 14, height = 7)

# Linear regression for bleaching frequency over time
lm_bleaching <- lm(bleaching_days ~ year, data = yearly_bleaches)
summary(lm_bleaching)

# Plot bleaching frequency with regression line
ggplot(yearly_bleaches, aes(x = year, y = bleaching_days)) +
  geom_point(color = "firebrick", size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "firebrick") +
  labs(title = "Trend in Annual Bleaching Frequency",
       x = "Year", y = "Bleaching Frequency (Days)") +
  theme_minimal()

# Linear regression for cooling frequency over time
lm_cooling <- lm(cooling_days ~ year, data = yearly_cooling)
summary(lm_cooling)

# Plot cooling frequency with regression line
ggplot(yearly_cooling, aes(x = year, y = cooling_days)) +
  geom_point(color = "steelblue", size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "steelblue") +
  labs(title = "Trend in Annual Cooling Frequency",
       x = "Year", y = "Cooling Frequency (Days)") +
  theme_minimal()

```


```{r regression_SST_analysis_ONI, echo=TRUE, message=FALSE, warning=FALSE, fig.width=12, fig.height=5}
# Load necessary libraries
library(tidyverse)
library(lubridate)
library(MASS)

# Load the data
SSTONI <- read_csv("SSTraw.csv")

# Ensure column names are properly formatted (remove any extra spaces)
SSTONI <- SSTONI %>%
  rename_with(~ str_trim(.), everything())

colnames(SSTONI)


# Convert 'date' to a character (if necessary) and then to 'z_date' using the "DD/MM/YYYY" format.
# Using .data$date forces dplyr to use the column instead of a function named 'date'.

library(dplyr)
library(lubridate)
SSTONI <- SSTONI %>%
  mutate(
    date = as.Date(as.character(date), format = "%Y%m%d"),  # Convert YYYYMMDD to Date
    year   = as.numeric(year),  # Ensure 'year' is numeric
    month  = as.numeric(month)  # Ensure 'month' is numeric
  ) %>%
  filter(!is.na(SST))  # Remove missing SST values


# Verify that the date conversion worked correctly
summary(SSTONI$date)


lm_model <- lm(SST ~ date, data = SSTONI)
plot(lm_model$residuals ~ SSTONI$date, main = "Residuals vs. Time", ylab = "Residuals", xlab = "Date")
abline(h = 0, col = "red", lty = 2)

qqnorm(lm_model$residuals)
qqline(lm_model$residuals, col = "red")

summary(lm_model)

# Ensure slope and intercept are correctly extracted from the linear model
lm_model <- lm(SST ~ date, data = SSTONI)
slope <- coef(lm_model)[2]  # Extract slope (date coefficient)
intercept <- coef(lm_model)[1]  # Extract intercept
r_squared <- summary(lm_model)$r.squared  # Extract R² value

# Format regression results for plot annotation
regression_text <- paste0("Slope: ", round(slope, 5),
                          "\nIntercept: ", round(intercept, 2),
                          "\nR²: ", round(r_squared, 3))

# Dynamically adjust annotation placement
annotation_x <- min(SSTONI$date) + (max(SSTONI$date) - min(SSTONI$date)) * 0.05  # Position 5% from start
annotation_y <- max(SSTONI$SST) - (max(SSTONI$SST) - min(SSTONI$SST)) * 0.1  # Position 10% below max SST

# Plot SST over time with the regression trend line
ggplot(SSTONI, aes(x = date, y = SST)) +
  geom_point(color = "blue", alpha = 0.5) +  # Data points
  geom_smooth(method = "lm", color = "red", linetype = "dashed", se = FALSE) +  # Linear regression line
  annotate("text", x = annotation_x, y = annotation_y, label = regression_text, hjust = 0, size = 4) +
  labs(
    title = "Linear Regression of SST Over Time",
    subtitle = paste("Slope =", round(slope, 5), ", R² =", round(r_squared, 3)),
    x = "Date",
    y = "Sea Surface Temperature (°C)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Save the plot with a corrected filename
ggsave("SST_Regression_Plot.png", width = 12, height = 5, dpi = 300)

SST_model <- lm(SST ~ date + ONI, data = SSTONI)
summary(SST_model)

```