# Purpose of script: To model bird species richness using
# GPP derived productivity measures

# Libraries ---------------------------
library(tidyverse)
library(inlabru)
library(INLA)
library(sf)
library(gstat)
library(terra)
library(ggspatial)
library(tidyterra)
library(ade4)
library(nlme)
library(scales)
library(styler)
library(lintr)
library(sp)
library(fmesher)
library(ggpubr)

# Settings ---------------------------
options(scipen = 999)
epsg5070km <- paste("+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs")

# Functions ---------------------------
scientific_10 <- function(x) {
  ifelse(
    x == 0, "0",
    parse(text = sub("e[+]?", " %*% 10^", scientific_format()(x)))
  )
}

# Input ---------------------------
richness <- read.csv("C:\\Comdyn\\ComdynOutput\\CompiledComdynOutput.csv")
gpp <- read.csv("C:\\GEE\\dhiData.csv")
landcover <- read.csv("C:\\GEE\\lcData.csv")
startpoints <- st_read(
  "C:\\GISLayers",
  "BBSStartPoints"
)
us_bcrs <- st_read(
  "C:\\GISLayers",
  "CONUS"
) |>
  st_transform(epsg5070km)

# Transforming and integrating data ---------------------------
gpp <- gpp |>
  filter(!is.na(sum) & !is.na(cov) & !is.na(min) &
  sum | cov | min > 0) |>
  select(rteno, sum, min, cov)

# Getting estimates for all years
richness_i <- richness |>
  select(rteno, iYear, N1) |>
  rename(
    Year = iYear,
    richness = N1
  )
richness_j <- richness |>
  select(rteno, jYear, N2) |>
  rename(
    Year = jYear,
    richness = N2
  )
richness <- rbind(richness_i, richness_j) |>
  unique()
startpoints <- startpoints |>
  st_transform(epsg5070km) |>
  mutate(rteno = StateNum * 1000 + Route) |>
  filter(Border40kmBuffer == 0,
         CountryNum == 840)
model_data <- inner_join(richness, gpp, by = "rteno") |>
  inner_join(landcover, by = "rteno") |>
  filter(impacted < 50) |>
  group_by(rteno) |>
  summarise(
    Richness = mean(richness),
    sum = mean(sum),
    min = mean(min),
    cov = mean(cov)
  ) |>
  inner_join(
    startpoints |>
      select(rteno, Latitude, Longitude),
    by = "rteno"
  )

# Exploration of independent and dependent variable assumptions
hist(model_data$Richness)
plot(model_data$sum ~ model_data$cov)
plot(model_data$sum ~ model_data$min)
plot(model_data$cov ~ model_data$min)

# Scaling independent variables
model_data <- mutate(model_data,
                     sumSC = scale(sum) %>% as.vector(.),
                     minSC = scale(min) %>% as.vector(.),
                     covSC = scale(cov) %>% as.vector(.))

# Create data set with read_sf so that SVC functions run
st_write(model_data, "C:\\ModelData\\modelData.shp", delete_layer = TRUE)
model_data <- read_sf("C:\\ModelData\\modelData.shp")
st_crs(model_data) <- epsg5070km

startpoints_coordinates <- st_as_sf(
  x = model_data,
  coords = c("Longitude", "Latitude"),
  crs = epsg5070km
) |>
  st_coordinates()

# Build understanding of distance distribution
Loc <- as.matrix(startpoints_coordinates[, c("X", "Y")])
D <- dist(Loc)
hist(D)

# Creating mesh to model spatially structured intercepts and covariates ---------------------------
max_distance <- max(D)
mesh <- inla.mesh.2d(
  startpoints_coordinates,
  max.edge = c(max_distance*0.1, max_distance*0.1),
  cutoff = 1
)
mesh$n
plot(mesh)
spde <- inla.spde2.pcmatern(
  mesh = mesh,
  prior.range = c(max_distance*0.5, 0.5),
  prior.sigma = c(1, 0.5)
)

# Formula for all models
model_formula = Richness ~.

# Modeling richness data - sum productivity measure ---------------------------
equation_sum <- ~ -1 +
  betaZero(geometry, model = spde) +
  betaCovariate(geometry, weights = sumSC, model = spde)
Start <- Sys.time()
model_sum <- bru(
  equation_sum,
  like(
    formula = model_formula,
    family = "gaussian",
    data = model_data
  ),
  options = list(
    control.compute = list(dic = TRUE,
                           waic = TRUE, 
                           cpo = TRUE,
                           return.marginals.predictor=T,
                           config=T),
    control.predictor = list(compute = T),
    control.inla = list(int.strategy = "eb"),
    verbose = FALSE)
)
cat("Length of time: ", Sys.time() - Start)
resid_sum <- model_data$Richness - model_sum$summary.fitted.values[grepl("APredictor", row.names(model_sum$summary.fitted.values)), "mean"]
plot(resid_sum ~ model_sum$summary.fitted.values[grepl("APredictor", row.names(model_sum$summary.fitted.values)), "mean"])
plot(resid_sum ~ model_data$sumSC)
mydata <- data.frame(resid_sum, startpoints_coordinates[,1],startpoints_coordinates[,2])
colnames(mydata) <- c("Merror","X","Y")
coordinates(mydata) <- c(2,3)
variogram_sum <- variogram(resid_sum ~ 1, cutoff = max(D)*(1/2), mydata,  cressie = TRUE)
sac_sum <- plot(variogram_sum, 
              xlab = list(label = "Distance (m)", cex = 1.5), 
              ylab = list(label = "Semi-variogram", cex = 1.5), 
              pch = 16, col = 1, cex = 1.4)
plot(sac_sum)
h1 <- hscat(Merror ~ 1, mydata, quantile(as.vector(D)[which(as.vector(D) < 0.5 * max(as.vector(D)))], seq(0, 1, 0.1)))
plot(h1)

#Modeling richness data - minimum productivity measure ---------------------------
equation_min <- ~ -1 +
  betaZero(geometry, model = spde) + 
  betaCovariate(geometry, weights = minSC, model = spde)
Start <- Sys.time()
model_min <- bru(
  equation_min,
  like(
    formula = model_formula,
    family = "gaussian",
    data = model_data
  ),
  options = list(
    control.compute = list(
      dic = TRUE,
      waic = TRUE,
      cpo = TRUE,
      return.marginals.predictor = T,
      config = T
    ),
    control.predictor = list(compute = T),
    control.inla = list(int.strategy = "eb"),
    verbose = FALSE
  )
)
cat("Length of time: ", Sys.time() - Start)
resid_min <- model_data$Richness - model_min$summary.fitted.values[grepl("APredictor", row.names(model_min$summary.fitted.values)), "mean"]
plot(resid_min ~ model_min$summary.fitted.values[grepl("APredictor", row.names(model_min$summary.fitted.values)), "mean"])
plot(resid_min ~ model_data$minSC)
mydata <- data.frame(resid_min, startpoints_coordinates[,1],startpoints_coordinates[,2])
colnames(mydata) <- c("Merror","X","Y")
coordinates(mydata) <- c(2,3)
variogram_min <- variogram(resid_min ~ 1, cutoff = max(D)*(1/2), mydata,  cressie = TRUE)
sac_min <- plot(variogram_min, 
              xlab = list(label = "Distance (m)", cex = 1.5), 
              ylab = list(label = "Semi-variogram", cex = 1.5), 
              pch = 16, col = 1, cex = 1.4)
print(sac_min)
h1 <- hscat(Merror~1, mydata,quantile(as.vector(D)[which(as.vector(D)<0.5*max(as.vector(D)))], seq(0,1,0.1)))
plot(h1)

#Modeling richness data - seasonality productivity measure ---------------------------
equation_cov <- ~ -1 +
  betaZero(geometry, model = spde) +
  betaCovariate(geometry, weights = covSC, model = spde)
Start <- Sys.time()
model_cov <- bru(
  equation_cov,
  like(
    formula = model_formula,
    family = "gaussian",
    data = model_data
  ),
  options = list(
    control.compute = list(dic = TRUE,
                           waic = TRUE, 
                           cpo = TRUE,
                           return.marginals.predictor=T,
                           config=T),
    control.predictor = list(compute = T),
    control.inla = list(int.strategy = "eb"),
    verbose = FALSE)
)
cat("Length of time: ",Sys.time()-Start)
resid_cov <- model_data$Richness - model_cov$summary.fitted.values[grepl("APredictor", row.names(model_cov$summary.fitted.values)), "mean"]
plot(resid_cov ~ model_cov$summary.fitted.values[grepl("APredictor", row.names(model_cov$summary.fitted.values)), "mean"])
plot(resid_cov ~ model_data$covSC)
mydata <- data.frame(resid_cov, startpoints_coordinates[, 1], startpoints_coordinates[, 2])
colnames(mydata) <- c("Merror", "X", "Y")
coordinates(mydata) <- c(2, 3)
variogram_cov <- variogram(resid_cov ~ 1, cutoff = max(D) * (1 / 2), mydata, cressie = TRUE)
sac_cov <- plot(variogram_cov,
  xlab = list(label = "Distance (m)", cex = 1.5),
  ylab = list(label = "Semi-variogram", cex = 1.5),
  pch = 16, col = 1, cex = 1.4
)
print(sac_cov)
h1 <- hscat(Merror ~ 1, mydata, quantile(as.vector(D)[which(as.vector(D) < 0.5 * max(as.vector(D)))], seq(0, 1, 0.1)))
plot(h1)