--- title: "5kmSSTNOAA" author: "Caroline Palmer" date: "2025-03-02" output: html_document --- ```{r 1985_2012_calculate_MMM_DHW_alternative, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(lubridate) library(zoo) library(ggplot2) # Read the dataset (ensure your CSV has a column named "date") df <- read.csv("SSTraw.csv") # Convert the 'date' column to Date format using dd/mm/yyyy # Ensure 'date' is treated as a character before conversion df$date <- as.Date(as.character(df$date), format = "%Y%m%d") # Check for missing SST values and remove them df <- df %>% filter(!is.na(SST)) # Ensure SST column exists if (!"SST" %in% names(df)) { stop("Error: SST column not found in the dataset.") } # Filter data for MMM calculation (1985-2012) df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012) # Check that data exists for the filtering range if (nrow(df_filtered) == 0) { warning("No data available for the period 1985-2012. Please verify the date format and range in your dataset.") # Optionally, use the full dataset if that is acceptable: df_filtered <- df } # Calculate monthly mean SSTs across the filtered period df_monthly <- df_filtered %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(mean_SST = mean(SST, na.rm = TRUE)) # Print monthly means for debugging print(df_monthly) # Ensure there are no missing values before running regression if (all(is.na(df_monthly$mean_SST))) { stop("Error: All mean_SST values are NA. Regression cannot proceed.") } else { # Perform linear regression to identify trends in monthly SSTs trend_model <- lm(mean_SST ~ month, data = df_monthly) print(summary(trend_model)) } # Adjust climatology to baseline period (1985-1990 + 1993) df_baseline <- df_filtered %>% filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993) # Ensure baseline data exists if (nrow(df_baseline) == 0) { stop("Error: No baseline data available for 1985-1990 and 1993.") } df_baseline_monthly <- df_baseline %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(baseline_SST = mean(SST, na.rm = TRUE)) # Determine MMM1 as the maximum of the monthly baseline means MMM1 <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE) # Check if MMM1 is valid if (!is.finite(MMM1)) { stop("Error: MMM1 calculation resulted in a non-finite value.") } print(paste("Calculated MMM1:", MMM1)) # Save MMM1 value to file write.csv(data.frame(MMM1 = MMM1), "MMM1_calculated.csv", row.names = FALSE) # Compute DHW for the full dataset based on MMM1. # NOAA's protocol accumulates only the excess above 1°C over MMM. df <- df %>% arrange(date) %>% mutate( SST_anomaly = SST - MMM1, # Only count the amount above 1°C (i.e. if anomaly > 1, subtract 1; otherwise, 0) exceedance = ifelse(SST_anomaly > 1, SST_anomaly - 1, 0) ) # Compute a 12-week (84-day) rolling sum for DHW and convert to °C-weeks (divide by 7) df <- df %>% mutate(DHW = rollapply(exceedance, width = 84, FUN = sum, align = "right", fill = NA) / 7) # Replace any infinite or missing DHW values with 0 df$DHW[is.infinite(df$DHW) | is.na(df$DHW)] <- 0 # Save DHW results to file write.csv(df, "DHW_calculated.csv", row.names = FALSE) # Pre-calculate the maximum SST for setting plot limits sst_max <- max(df$SST, na.rm = TRUE) library(ggplot2) # Plot SST, MMM1, and DHW over time p <- ggplot(df, aes(x = date)) + geom_line(aes(y = SST, color = "SST (°C)"), size = 1) + # Scale DHW to overlay on the SST scale. geom_line(aes(y = DHW * (sst_max - 25) / 7 + 25, color = "DHW (°C-weeks)"), size = 1) + geom_hline(aes(yintercept = MMM1, color = "MMM"), linetype = "dashed", size = 1) + geom_smooth(aes(y = SST, color = "SST Trend"), method = "lm", linetype = "solid", size = 1, se = FALSE) + scale_y_continuous( name = "SST (°C)", limits = c(25, sst_max), sec.axis = sec_axis(~ (. - 25) * 7 / (sst_max - 25), name = "DHW (°C-weeks)", breaks = seq(0, 7, by = 1)) ) + scale_x_date( limits = as.Date(c("1981-01-01", "2025-12-31")), date_breaks = "1 year", date_labels = "%Y", expand = c(0.01, 0) # Add slight padding for clarity ) + scale_color_manual( values = c("SST (°C)" = "steelblue", "DHW (°C-weeks)" = "darkred", "MMM" = "black", "SST Trend" = "white") ) + labs(title = "SST, MMM1, and Degree Heating Weeks Over Time", x = "Year", color = "Legend") + theme_minimal(base_size = 16) + # Increase global font size theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 14), # Rotate x-axis labels for clarity axis.text.y = element_text(size = 14), axis.title = element_text(size = 18, face = "bold"), legend.text = element_text(size = 14), legend.title = element_text(size = 16, face = "bold"), plot.title = element_text(size = 20, face = "bold", hjust = 0.5) ) # Print the plot to the R console print(p) # Save the plot to file with increased width ggsave("SST_DHW12_plot.png", plot = p, width = 14, height = 7) ``` ```{r 1985_2012_calculate_MMM0_5_12_DHW_corrected, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(lubridate) library(zoo) library(tidyr) library(ggplot2) # Read the dataset (ensure your CSV has a column named "date") df <- read.csv("SSTraw.csv") # Convert the 'date' column to Date format using dd/mm/yyyy # Ensure 'date' is treated as a character before conversion df$date <- as.Date(as.character(df$date), format = "%Y%m%d") # Check for missing SST values and remove them df <- df %>% filter(!is.na(SST)) # Ensure SST column exists if (!"SST" %in% names(df)) { stop("Error: SST column not found in the dataset.") } # Filter data for MMM calculation (1985-2012) df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012) # Check that data exists for the filtering range if (nrow(df_filtered) == 0) { warning("No data available for the period 1985-2012. Please verify the date format and range in your dataset.") # Optionally, use the full dataset if that is acceptable: df_filtered <- df } # Calculate monthly mean SSTs across the filtered period df_monthly <- df_filtered %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(mean_SST = mean(SST, na.rm = TRUE)) # Print monthly means for debugging print(df_monthly) # Ensure there are no missing values before running regression if (all(is.na(df_monthly$mean_SST))) { stop("Error: All mean_SST values are NA. Regression cannot proceed.") } else { # Perform linear regression to identify trends in monthly SSTs trend_model <- lm(mean_SST ~ month, data = df_monthly) print(summary(trend_model)) } # Adjust climatology to baseline period (1985-1990 + 1993) df_baseline <- df_filtered %>% filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993) # Ensure baseline data exists if (nrow(df_baseline) == 0) { stop("Error: No baseline data available for 1985-1990 and 1993.") } df_baseline_monthly <- df_baseline %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(baseline_SST = mean(SST, na.rm = TRUE)) # Determine MMM1 as the maximum of the monthly baseline means MMM <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE) # Check if MMM1 is valid if (!is.finite(MMM)) { stop("Error: MMM calculation resulted in a non-finite value.") } print(paste("Calculated MMM:", MMM)) # Save MMM1 value to file write.csv(data.frame(MMM = MMM), "MMM_calculated.csv", row.names = FALSE) # Compute DHW for the full dataset based on MMM. # NOAA's protocol accumulates only the excess above 1°C over MMM. library(dplyr) library(zoo) # For rolling sum # Arrange by date df <- df %>% arrange(date) %>% mutate( SST_anomaly = SST - MMM, # Calculate anomaly as SST - MMM # Keep all positive SST anomalies (i.e., any value above MMM) exceedance = ifelse(SST_anomaly > 0.5, SST_anomaly -0.5, 0) ) # Compute 12-week (84-day) rolling sum for DHW and convert to °C-weeks df <- df %>% mutate(DHW = rollapply(exceedance, width = 84, FUN = sum, align = "right", fill = NA) / 7) %>% mutate(DHW = replace_na(DHW, 0)) # Replace missing DHW values with 0 # Save DHW results to file write.csv(df, "DHW0_5_12_calculated.csv", row.names = FALSE) # Pre-calculate the maximum SST for setting plot limits sst_max <- max(df$SST, na.rm = TRUE) library(ggplot2) library(ggplot2) # Define max DHW for scaling dhw_max <- max(df$DHW, na.rm = TRUE) # Plot SST, MMM, and DHW over time plot2 <- ggplot(df, aes(x = date)) + geom_line(aes(y = SST, color = "SST (°C)"), size = 1) + # Correct DHW scaling and plotting geom_line(aes(y = (DHW / dhw_max) * (sst_max - 25) + 25, color = "DHW (°C-weeks)"), size = 1) + geom_hline(aes(yintercept = MMM, color = "MMM"), linetype = "dashed", size = 1) + geom_smooth(aes(y = SST, color = "SST Trend"), method = "lm", size = 1, se = FALSE) + # Correct placement of sec.axis inside scale_y_continuous() scale_y_continuous( name = "SST (°C)", limits = c(25, sst_max), sec.axis = sec_axis(~ (. - 25) * dhw_max / (sst_max - 25), name = "DHW (°C-weeks)", breaks = seq(0, round(dhw_max, 1), by = 1)) ) + scale_x_date( limits = as.Date(c("1985-01-01", "2025-12-31")), date_breaks = "1 year", date_labels = "%Y", expand = c(0.01, 0) ) + scale_color_manual( values = c("SST (°C)" = "steelblue", "DHW (°C-weeks)" = "darkred", "MMM" = "black", "SST Trend" = "white") ) + labs(title = "SST, MMM, and DHW Over Time", x = "Year", color = "Legend") + theme_minimal(base_size = 16) + theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 14), axis.text.y = element_text(size = 14), axis.title = element_text(size = 18, face = "bold"), legend.text = element_text(size = 14), legend.title = element_text(size = 16, face = "bold"), plot.title = element_text(size = 20, face = "bold", hjust = 0.5) ) # Print the plot print(plot2) # Save to file ggsave("SST_DHW0_5_12_plot.png", plot = plot2, width = 14, height = 7) ``` ```{r USING THIS ONEcombined_bleachingMMM_coolingclimMean0.5_frequency, echo=TRUE, message=FALSE, warning=FALSE} # Load necessary libraries library(dplyr) library(lubridate) library(ggplot2) library(tidyr) # Read the dataset and convert the 'date' column to Date format df <- read.csv("SSTraw.csv") df$date <- as.Date(as.character(df$date), format = "%Y%m%d") df <- df %>% filter(!is.na(SST)) ### Part 1: Calculate Bleaching Frequency ### # Define the baseline period for bleaching threshold: 1985-2012 df_filtered <- df %>% filter(year(date) >= 1985 & year(date) <= 2012) # Compute monthly mean SSTs over the filtered period (for each calendar month) df_monthly <- df_filtered %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(mean_SST = mean(SST, na.rm = TRUE)) # Refine the climatology using selected years (1985–1990 and 1993) to reduce noise df_baseline <- df_filtered %>% filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993) df_baseline_monthly <- df_baseline %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(baseline_SST = mean(SST, na.rm = TRUE)) # Determine MMM as the maximum of the refined monthly baseline means MMM <- max(df_baseline_monthly$baseline_SST, na.rm = TRUE) print(paste("Calculated MMM:", MMM)) # Define the bleaching threshold as MMM + 0.5 bleaching_threshold <- MMM +0.5 # Calculate the number of days per year with SST > bleaching_threshold yearly_bleaches <- df %>% mutate(year = year(date)) %>% group_by(year) %>% summarise(bleaching_days = sum(SST > bleaching_threshold, na.rm = TRUE)) # Write bleaching frequency to a CSV file write.csv(yearly_bleaches, "yearly_bleaching_frequency.csv", row.names = FALSE) ### Part 2: Calculate Cooling Frequency ### # For the cooling threshold, compute the climatological monthly mean for each month (1985–2012) df_cooling_baseline <- df_filtered %>% filter((year(date) >= 1985 & year(date) <= 1990) | year(date) == 1993) baseline_monthly_mean <- df_cooling_baseline %>% mutate(month = month(date)) %>% group_by(month) %>% summarise(clim_mean = mean(SST, na.rm = TRUE)) baseline_monthly_mean <- baseline_monthly_mean %>% mutate(cooling_threshold = clim_mean - 0.5) # Join the monthly cooling thresholds back to the full dataset df_cooling <- df %>% mutate(month = month(date), year = year(date)) %>% left_join(baseline_monthly_mean, by = "month") %>% mutate(cooling_event = if_else(SST < cooling_threshold, 1, 0)) # Calculate the number of cooling days per year yearly_cooling <- df_cooling %>% group_by(year) %>% summarise(cooling_days = sum(cooling_event, na.rm = TRUE)) # Write cooling frequency to a CSV file write.csv(yearly_cooling, "yearly_cooling_frequency.csv", row.names = FALSE) ### Part 3: Combine and Plot the Frequencies ### # Combine bleaching and cooling frequencies into one data frame. # For plotting, cooling frequencies will be shown as negative values. combined_freq <- yearly_bleaches %>% rename(bleaching = bleaching_days) %>% left_join(yearly_cooling %>% rename(cooling = cooling_days), by = "year") %>% mutate(cooling = -cooling) %>% pivot_longer(cols = c(bleaching, cooling), names_to = "Event", values_to = "Days") # Recode the Event variable to "Warm" and "Cool" combined_freq <- combined_freq %>% mutate(Event = dplyr::recode(Event, bleaching = "Warm", cooling = "Cool")) # Create a bar plot showing warm events (positive, in firebrick) and cool events (negative, in steelblue) combined <- ggplot(combined_freq, aes(x = factor(year), y = Days, fill = Event)) + geom_bar(stat = "identity", position = "dodge") + scale_fill_manual(values = c("Warm" = "firebrick", "Cool" = "steelblue")) + labs(title = "Annual Frequency of Bleaching and Cooling Events", x = "Year", y = "Frequency (Days)", fill = "Event") + theme_minimal(base_size = 14) + theme(axis.text.x = element_text(angle = 60, hjust = 1)) plot(combined) ggsave("combine_freq.png", plot = combined, width = 14, height = 7) # Linear regression for bleaching frequency over time lm_bleaching <- lm(bleaching_days ~ year, data = yearly_bleaches) summary(lm_bleaching) # Plot bleaching frequency with regression line ggplot(yearly_bleaches, aes(x = year, y = bleaching_days)) + geom_point(color = "firebrick", size = 3) + geom_smooth(method = "lm", se = FALSE, color = "firebrick") + labs(title = "Trend in Annual Bleaching Frequency", x = "Year", y = "Bleaching Frequency (Days)") + theme_minimal() # Linear regression for cooling frequency over time lm_cooling <- lm(cooling_days ~ year, data = yearly_cooling) summary(lm_cooling) # Plot cooling frequency with regression line ggplot(yearly_cooling, aes(x = year, y = cooling_days)) + geom_point(color = "steelblue", size = 3) + geom_smooth(method = "lm", se = FALSE, color = "steelblue") + labs(title = "Trend in Annual Cooling Frequency", x = "Year", y = "Cooling Frequency (Days)") + theme_minimal() ``` ```{r regression_SST_analysis_ONI, echo=TRUE, message=FALSE, warning=FALSE, fig.width=12, fig.height=5} # Load necessary libraries library(tidyverse) library(lubridate) library(MASS) # Load the data SSTONI <- read_csv("SSTraw.csv") # Ensure column names are properly formatted (remove any extra spaces) SSTONI <- SSTONI %>% rename_with(~ str_trim(.), everything()) colnames(SSTONI) # Convert 'date' to a character (if necessary) and then to 'z_date' using the "DD/MM/YYYY" format. # Using .data$date forces dplyr to use the column instead of a function named 'date'. library(dplyr) library(lubridate) SSTONI <- SSTONI %>% mutate( date = as.Date(as.character(date), format = "%Y%m%d"), # Convert YYYYMMDD to Date year = as.numeric(year), # Ensure 'year' is numeric month = as.numeric(month) # Ensure 'month' is numeric ) %>% filter(!is.na(SST)) # Remove missing SST values # Verify that the date conversion worked correctly summary(SSTONI$date) lm_model <- lm(SST ~ date, data = SSTONI) plot(lm_model$residuals ~ SSTONI$date, main = "Residuals vs. Time", ylab = "Residuals", xlab = "Date") abline(h = 0, col = "red", lty = 2) qqnorm(lm_model$residuals) qqline(lm_model$residuals, col = "red") summary(lm_model) # Ensure slope and intercept are correctly extracted from the linear model lm_model <- lm(SST ~ date, data = SSTONI) slope <- coef(lm_model)[2] # Extract slope (date coefficient) intercept <- coef(lm_model)[1] # Extract intercept r_squared <- summary(lm_model)$r.squared # Extract R² value # Format regression results for plot annotation regression_text <- paste0("Slope: ", round(slope, 5), "\nIntercept: ", round(intercept, 2), "\nR²: ", round(r_squared, 3)) # Dynamically adjust annotation placement annotation_x <- min(SSTONI$date) + (max(SSTONI$date) - min(SSTONI$date)) * 0.05 # Position 5% from start annotation_y <- max(SSTONI$SST) - (max(SSTONI$SST) - min(SSTONI$SST)) * 0.1 # Position 10% below max SST # Plot SST over time with the regression trend line ggplot(SSTONI, aes(x = date, y = SST)) + geom_point(color = "blue", alpha = 0.5) + # Data points geom_smooth(method = "lm", color = "red", linetype = "dashed", se = FALSE) + # Linear regression line annotate("text", x = annotation_x, y = annotation_y, label = regression_text, hjust = 0, size = 4) + labs( title = "Linear Regression of SST Over Time", subtitle = paste("Slope =", round(slope, 5), ", R² =", round(r_squared, 3)), x = "Date", y = "Sea Surface Temperature (°C)" ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Save the plot with a corrected filename ggsave("SST_Regression_Plot.png", width = 12, height = 5, dpi = 300) SST_model <- lm(SST ~ date + ONI, data = SSTONI) summary(SST_model) ```