# Purpose of script: To produce data for estimating
# species richness and community dynamics via COMDYN

# Libraries ####

library(tidyverse)
library(readxl)
library(sf)

# Settings ####

options(scipen = 999)

# Input ####

iTest <- read.csv("C:\\Input\\BBSProcessedData.csv")
iCount <- iTest %>%
  select(rteno, Year, AOU, fCount)

# Processing ####

# Species List - this is a version of the SpeciesList.csv file provided by
# the USGS that removes the header enabling it to be read into R more easily
iSpecies <- read_csv("C:\\2023Release_Nor\\SpeciesListR2023.csv") %>%
  mutate(
    AOU = sprintf("%05s", AOU),
    ScientificName = paste(Genus, Species, sep = " ")
  ) %>%
  rename(CommonName = English_Common_Name) %>%
  filter(!grepl("unid.", CommonName)) %>% # Getting rid of AOU codes associated with unidentified individuals
  filter(!grepl("Unid.", CommonName)) %>%
  # Renaming variants to main species name
  mutate(CommonName = ifelse(grepl("Yellow-rumped Warbler", CommonName), "Yellow-rumped Warbler",
    ifelse(grepl("Dark-eyed Junco", CommonName), "Dark-eyed Junco",
      ifelse(grepl("Northern Flicker", CommonName), "Northern Flicker",
        ifelse(grepl("American Crow", CommonName), "American Crow",
          ifelse(grepl("Red-tailed Hawk", CommonName), "Red-tailed Hawk",
            ifelse(grepl("Great Blue Heron", CommonName), "Great Blue Heron",
              ifelse(grepl("Brant", CommonName), "Brant",
                ifelse(grepl("Snow Goose", CommonName), "Snow Goose",
                    CommonName
                  )
                )
              )
            )
          )
        )
      )
    ))

# Bringing in avian conservation assessment database

# Partners in Flight. (2024). Avian Conservation Assessment Database, version 2024. 
# Available at http://pif.birdconservancy.org/ACAD 

# Matching criteria from Phillips et al. 2010 - excluding
# aquatic, nocturnal, raptorial, and exotic species
# Nocturnal species - removing owls and caprimulgids based on targeted monitoring
# for species in Knight et al. 2021
# Raptorial species - using definition in McClure et al. 2019
# Exotic - using ACAD designation
# Aquatic - excluding based on North American Waterbird Conservation Plan,
# reviewed remaining species to remove Anseriformes, Charadriiformes, Oceanitidae,
acad <- read_xlsx("C:\\ACAD\\ACAD Global 2024.05.23.xlsx") %>%
  select(`Common Name`, `Scientific Name`, order, family, `Mig Status`, `Primary Breeding Habitat`, `Secondary Breeding Habitat`, Introduced) %>%
  rename(
    CommonName = `Common Name`, ScientificName = `Scientific Name`, MigStatus = `Mig Status`,
    pHab = `Primary Breeding Habitat`, sHab = `Secondary Breeding Habitat`
  ) %>%
  filter(is.na(Introduced) & !str_detect(family, "Caprimulgidae") &
    !(order %in% c(
      "Accipitriformes", "Cathartiformes", "Falconiformes",
      "Strigiformes"
    )) & !(family %in% c(
    "Gaviidae",
    "Podicipedidae", "Diomedeidae", "Procellariidae",
    "Hydrobatidae", "Phaethontidae", "Sulidae",
    "Pelecanidae", "Phalacrocoracidae", "Anhingidae",
    "Fregatidae", "Ardeidae", "Threskiornithidae",
    "Ciconiidae", "Phoenicopteridae", "Accipitridae",
    "Rallidae", "Heliornithidae", "Eurypygidae",
    "Aramidae", "Gruidae", "Laridae", "Alcidae", "Oceanitidae"
  )) &
    !(order %in% c("Anseriformes", "Charadriiformes"))) %>%
  select(CommonName, ScientificName, order, family, pHab, sHab, MigStatus)

# keeping only species from acad join
iSpecies <- iSpecies %>%
  filter(CommonName %in% acad$CommonName)

# Filter input data sets to keep only relevant species
iCount <- iCount %>%
  mutate(AOU = sprintf("%05d", as.numeric(AOU))) %>%
  filter(AOU %in% iSpecies$AOU)

iTest <- iTest %>%
  mutate(AOU = sprintf("%05d", as.numeric(AOU))) %>%
  filter(AOU %in% iSpecies$AOU)

# Creating data representing number of species observed
# on exactly X stops for each route
dFreq1 <- iCount %>%
  group_by(rteno, Year, fCount) %>%
  tally() %>%
  ungroup() %>%
  mutate(fCount = paste("i", fCount, sep = "")) %>%
  spread(., fCount, n) %>%
  mutate(
    i1 = ifelse(is.na(i1), 0, i1), # Need to account for NA where no species occurred in
    i2 = ifelse(is.na(i2), 0, i2), # a frequency category
    i3 = ifelse(is.na(i3), 0, i3),
    i4 = ifelse(is.na(i4), 0, i4),
    i5 = ifelse(is.na(i5), 0, i5)
  ) %>%
  as.data.frame() %>%
  arrange(rteno, Year)

# Creating data frame representing number of species observed
# on each stop
dRich1 <- iTest %>%
  group_by(rteno, Year) %>%
  summarise(
    i1 = sum(i1),
    i2 = sum(i2),
    i3 = sum(i3),
    i4 = sum(i4),
    i5 = sum(i5)
  ) %>%
  ungroup() %>%
  as.data.frame() %>%
  arrange(rteno, Year)

tStart <- Sys.time()

if (exists("final.df")) {
  rm(final.df)
}

for (i in 2000:2003) {
  # Two years to be compared, i and j
  j <- i + 1

  for (rte in unique(dFreq1$rteno)) {
    # Printing progress information
    cat(rte, "", i, "", j, "\n")

    # Creating frequency data for years i and j
    dFreq <- dFreq1 %>%
      filter(rteno == rte, Year %in% c(i, j)) %>%
      arrange(Year) %>%
      mutate(
        iYear = i,
        jYear = j,
        metric = "Freq"
      )

    # Creating species richness data for years i and j
    dRich <- dRich1 %>%
      filter(rteno == rte, Year %in% c(i, j)) %>%
      arrange(Year) %>%
      mutate(
        iYear = i,
        jYear = j,
        metric = "Rich"
      )

    colCheck <- sum(c("i1", "i2", "i3", "i4", "i5") %in% colnames(dFreq))
    rowCheck <- nrow(dFreq)

    if (colCheck == 5 & rowCheck == 2) {
      # Creating species list for j seen in i, and i seen in j
      iSpecies <- iCount %>%
        filter(rteno == rte & Year == i) %>%
        pull(AOU) %>%
        unique()

      jSpecies <- iCount %>%
        filter(rteno == rte & Year == j) %>%
        pull(AOU) %>%
        unique()

      # Creating frequency data frames to estimate
      # extinction and colonization rates

      # Creating data frame to fill in when
      # segment is missing observations
      mFill <- data.frame(nLabel = c("i1", "i2", "i3", "i4", "i5"), v = rep(0, 5))
      rFill <- unique(dFreq$rteno)

      # Frequency for species observed during period i
      # that were also seen during period j
      iFreq <- iCount %>%
        filter(rteno == rte & Year == i & AOU %in% jSpecies) %>%
        group_by(rteno, Year, fCount) %>%
        tally() %>%
        ungroup() %>%
        mutate(fCount = paste("i", fCount, sep = "")) %>%
        full_join(mFill, by = c("fCount" = "nLabel")) %>%
        mutate(
          n = ifelse(is.na(n), v, n),
          rteno = rFill,
          Year = i
        ) %>%
        select(-c(v)) %>%
        spread(., fCount, n) %>%
        as.data.frame() %>%
        mutate(
          iYear = i,
          jYear = j,
          metric = "Freq"
        )

      # Frequency for species observed during period j
      # that were also seen during period i
      jFreq <- iCount %>%
        filter(rteno == rte & Year == j & AOU %in% iSpecies) %>%
        group_by(rteno, Year, fCount) %>%
        tally() %>%
        ungroup() %>%
        mutate(fCount = paste("i", fCount, sep = "")) %>%
        full_join(mFill, by = c("fCount" = "nLabel")) %>%
        mutate(
          n = ifelse(is.na(n), v, n),
          rteno = rFill,
          Year = j
        ) %>%
        select(-(v)) %>%
        spread(., fCount, n) %>%
        as.data.frame() %>%
        mutate(
          iYear = i,
          jYear = j,
          metric = "Freq"
        )

      # Creating data frame representing number of species observed
      # on each stop
      iRich <- iTest %>%
        filter(rteno == rte & Year == i & AOU %in% jSpecies) %>%
        group_by(rteno, Year) %>%
        summarise(
          i1 = sum(i1),
          i2 = sum(i2),
          i3 = sum(i3),
          i4 = sum(i4),
          i5 = sum(i5)
        ) %>%
        ungroup() %>%
        as.data.frame() %>%
        arrange(rteno, Year) %>%
        mutate(
          iYear = i,
          jYear = j,
          metric = "Rich"
        )

      jRich <- iTest %>%
        filter(rteno == rte & Year == j & AOU %in% iSpecies) %>%
        group_by(rteno, Year) %>%
        summarise(
          i1 = sum(i1),
          i2 = sum(i2),
          i3 = sum(i3),
          i4 = sum(i4),
          i5 = sum(i5)
        ) %>%
        ungroup() %>%
        as.data.frame() %>%
        arrange(rteno, Year) %>%
        mutate(
          iYear = i,
          jYear = j,
          metric = "Rich"
        )

      if (!exists("final.df")) {
        final.df <- rbind(dFreq, iFreq, jFreq, dRich, iRich, jRich)
      } else {
        final.df <- rbind(final.df, dFreq, iFreq, jFreq, dRich, iRich, jRich)
      }
    }
  }
}

cat("Time taken: ", Sys.time() - tStart, "\n")

write.csv(final.df, "C:\\Input\\comdynInput.csv", row.names = F)