#####################################################
# Marco FW Gauger
# 2021/02/23
#
# This is the script to obtain fig 6 of the manuscript submitted to PeerJ Gauger et al 2021 Diel influences on bottlenose dolphin acoustic detection in a coastal lagoon in the southwestern Gulf of California
# The script is based on the information given by Rose on his blog and changed if necesssary.
# https://fromthebottomoftheheap.net/2017/10/10/difference-splines-i/
# It is  meant for reviewers and shall not made available as supplementary material.
# As intermediate results are created csv files that contain thw predictions. Tjos was necessary as a personel laptop is too slow and or has too little RAM to run the script in one sweep.
# The csv files can be merged in a later step to create the final graph.
#####################################################


ifelse("mgcv" %in% rownames(installed.packages()) == FALSE, 
       install.packages("mgcv", dependencies = T),
       ("mgcv is already installed"))
ifelse("ggplot2" %in% rownames(installed.packages()) == FALSE, 
       install.packages(c("ggplot2"), dependencies = T),
       ("ggplot2 is already installed"))
ifelse("cowplot" %in% rownames(installed.packages()) == FALSE, 
       install.packages("cowplot", dependencies = T),
       ("cowplot is already installed"))

require(mgcv)
require(ggplot2)
require(cowplot)

Sys.setlocale("LC_ALL","English")
Sys.setenv(TZ='UTC')

analysed_data_hour <- read.csv("Supplemental Data S1.csv", sep=";")
head(analysed_data_hour)
analysed_data_hour$cluster_hcpc <- factor(analysed_data_hour$cluster_hcpc)


analysed_data_hour$marea <- analysed_data_hour$tide
analysed_data_hour$h2 <- analysed_data_hour$h
##### #####
b2 <- # final model <-
  gam(dp10m_ELAP1~ s(lunar_phase, bs="cc", k=6) + 
        te(derivate_tide, marea, bs=c("tp","tp"),
           k=c(10,10)) + 
        s(SST, k=10, bs="tp") + effort + 
        s(h2, bs="cc") + 
        s(depth, k=4) + 
        s(distance_BLAP, k=4) , 
      data=analysed_data_hour, method="REML",family="poisson", 
      knots=list(h2 = c(0,24)))
summary(b2)
AIC(b2)
plot(b2, page=1, scale=0, shade=T)
plot(b2, select=4, scale=0, shade=T)
abline(h=0)


##### model c2 #####
model_c2 <- #dp10m_mod_cluster_S_h <-
  gam(dp10m_ELAP1~ 
        s(lunar_phase, bs="cc", k=6) + 
        te(derivate_tide, marea, bs=c("tp","tp"),
           k=c(10,10)) + 
        s(SST, k=10, bs="tp") + effort + 
        s(h2, cluster_hcpc, bs="fs",xt=list(bs="cc")) + 
        s(depth, k=4) + 
        s(distance_BLAP, k=4), 
      data=analysed_data_hour, method="REML",family="poisson", 
      knots=list(h2 = c(0,24)))

summary(model_c2)
AIC(model_c2)
plot(model_c2, page=1, scale=0, shade=T)
plot(model_c2, select=4, scale=0, shade=T)


##### model d2 #####
model_d2<-# dp10m_mod_cluster_I_h <-
  gam(dp10m_ELAP1~ 
        s(lunar_phase, bs="cc") + 
        te(derivate_tide, marea, bs=c("tp","tp"),
           k=c(10,10)) + 
        s(SST, k=10, bs="tp") +
        s(h2, by= cluster_hcpc, k=10,  bs="cc") + 
        effort +
        s(depth, k=4) +
        s(distance_BLAP, k=4)+
        s(cluster_hcpc, bs="re"),
      data=analysed_data_hour, method="REML",family="poisson",
      knots=list(h2 = c(0,24)))

summary(model_d2)
AIC(model_d2)
plot(model_d2, page=1, scale=0, shade=T)

##### smooth_diff ######
smooth_diff <- function(model, newdata, f1, f2, var, alpha = 0.05,
                        unconditional = FALSE) {
  xp <- predict(model, newdata = newdata, type = 'lpmatrix')
  c1 <- grepl(f1, colnames(xp))
  c2 <- grepl(f2, colnames(xp))
  r1 <- newdata[[var]] == f1
  r2 <- newdata[[var]] == f2
  ## difference rows of xp for data from comparison
  X <- xp[r1, ] - xp[r2, ]
  ## zero out cols of X related to splines for other lochs
  X[, ! (c1 | c2)] <- 0
  ## zero out the parametric cols
  X[, !grepl('^s\\(', colnames(xp))] <- 0
  dif <- X %*% coef(model)
  se <- sqrt(rowSums((X %*% vcov(model, unconditional = unconditional)) * X))
  crit <- qt(alpha/2, df.residual(model), lower.tail = FALSE)
  upr <- dif + (crit * se)
  lwr <- dif - (crit * se)
  data.frame(pair = paste(f1, f2, sep = '-'),
             diff = dif,
             se = se,
             upper = upr,
             lower = lwr)
}
##### smooth_diff ######
comp = Comp = NA
comp1 = comp2 = comp3 = NA
########################
setwd("C:/pro/Mexico_doctorado/05_trabajo-de-invesigacion/tésis/script/archiv")
dir.create(paste0(getwd(),"/d2"))
dir.create(paste0(getwd(),"/c2"))

# choose separate archive so you can merge files more easily
n=1
for(n in 1:25){
###### #######
# if desired length = 24 might be sufficient, however to be sure we quadrubled this value (hour)   
###### #######
  
    pdat <- expand.grid(h2 = seq(0,24, length=25)[n], 
                    lunar_phase = seq(0,1,length = 5),
                    derivate_tide = seq(-0.03,0.03,length = 5),
                    marea = seq(-.8,.8,length = 5),
                    SST = seq(21,30,length = 4),
                    depth = seq(1,5,length = 5),
                    effort = seq(0,60,length = 2),
                    distance_BLAP = seq(1000,6800,length = 5),
                    cluster_hcpc = c('1', '2', '3'))



#nrow(smooth_diff(model_d2, pdat, '1', '2', 'cluster_hcpc'))
N = rep(seq(0,24,length=25)[n], 25000)
write.csv(file=paste(getwd(),"/d2/comp_",formatC(n, format = "fg", flag = "0", digits = 2),".csv", sep=""), cbind(h = N, rbind(smooth_diff(model_d2, pdat, '1', '2', 'cluster_hcpc'),smooth_diff(model_d2, pdat, '1', '3', 'cluster_hcpc'),smooth_diff(model_d2, pdat, '2', '3', 'cluster_hcpc'))))

#nrow(smooth_diff(model_d2, pdat, '1', '2', 'cluster_hcpc'))
N = rep(seq(0,24,length=25)[n], 25000)
write.csv(file=paste(getwd(),"/c2/comp_",formatC(n, format = "fg", flag = "0", digits = 2),".csv", sep=""), cbind(h = N, rbind(smooth_diff(model_c2, pdat, '1', '2', 'cluster_hcpc'), smooth_diff(model_c2, pdat, '1', '3', 'cluster_hcpc'), smooth_diff(model_c2, pdat, '2', '3', 'cluster_hcpc'))) )
###### #######
###### #######
}
n

comp = comp1 = comp2 = comp3 = NA
pdat=NA

# combine the csv files of model model_d2
# here shall go the archive where csv files of model model_d2 were saved. 
COMP_001_100_d2 <- dir(paste0(getwd(),"/d2"),".csv")
length(COMP_001_100_d2)
for(n in 1:100){
  if(n==1) comp_d2 = read.csv(paste0(getwd(),"/d2/",COMP_001_100_d2[n]))[c(1,25001,50001),]
  if(n> 1) comp_d2 = rbind(comp_d2, read.csv(paste0(getwd(),"/d2/",COMP_001_100_d2[n]))[c(1,25001,50001),])
}

x11()
comp_d_small <-
  ggplot(comp_d2, aes(x = h, y = diff, group = pair)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  geom_line() +
  geom_line(y=0,  linetype = 2) +
  facet_wrap(~ pair, ncol = 3) +
  coord_cartesian(ylim = c(-1.5,1)) +
  labs(x = 'hour', y = 'Difference in hour trend') +
  scale_x_discrete(name ="hour", limits=seq(0,24,by=4))+
  theme_bw()
plot(comp_d_small)
#ggsave(comp_d_small, file= "comparison_model_d_small_b.jpeg", width = 150, height = 100, units = c("mm"), dpi = 300)

  

# combine the csv files of model c2
# here shall go the archive where csv files of model c2 were saved. 
COMP_001_100_c2 <- dir(paste0(getwd(),"/c2"),".csv")
length(COMP_001_100_c2)
for(m in 1:length(COMP_001_100_c2)){
  if(m==1) comp_c2 = read.csv(paste0(getwd(),"/c2/", COMP_001_100_c2[m]))[c(1,25001,50001),]
  if(m> 1) comp_c2 = rbind(comp_c2, read.csv(paste0(getwd(),"/c2/", COMP_001_100_c2[m]))[c(1,25001,50001),])
}


x11()
comp_c_small <- 
  ggplot(comp_c2[1:75,], aes(x = h, y = diff, group = pair)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  geom_line() +
  geom_line(y=0,  linetype = 2) +
  facet_wrap(~ pair, ncol = 3) +
  coord_cartesian(ylim = c(-1.5,1)) +
  labs(x = 'hour', y = 'Difference in hour trend') +
  scale_x_discrete(name ="hour", limits=seq(0,24,by=4))+
  theme_bw()

plot_grid(comp_c_small,comp_d_small, 
          labels = c('A', 'B'),align = "h", nrow=2)

comp_d2$model = rep("model I", nrow(comp_d2))  
comp_c2$model = rep("model S", nrow(comp_c2))  

comp_cd = rbind(comp_d2,comp_c2)
comp_cd$Fpair <- as.character(comp_cd$pair)
unique(comp_cd$pair)
comp_cd$Fpair[comp_cd$pair == "1-2"] <- "Aug-Oct vs. Oct-Mar"
comp_cd$Fpair[comp_cd$pair == "1-3"] <- "Aug-Oct vs. May-Jul"
comp_cd$Fpair[comp_cd$pair == "2-3"] <- "Oct-Mar vs. May-Jul"
comp_cd$Fpair <- 
  factor(comp_cd$Fpair, levels=c('Aug-Oct vs. Oct-Mar',
                                 'Aug-Oct vs. May-Jul',
                                 'Oct-Mar vs. May-Jul'))
comp_cd$pair_model <- paste(comp_cd$pair, comp_cd$model, sep="_")
comp_cd$Fpair_model <- paste(comp_cd$model, comp_cd$Fpair, sep=" - ")
comp_cd$Fpair_model <- 
  factor(comp_cd$Fpair_model, levels=c('model S - Aug-Oct vs. Oct-Mar',
                                 'model S - Aug-Oct vs. May-Jul',
                                 'model S - Oct-Mar vs. May-Jul',
                                 'model I - Aug-Oct vs. Oct-Mar',
                                 'model I - Aug-Oct vs. May-Jul',
                                 'model I - Oct-Mar vs. May-Jul'))
comp_cd$model <- 
  factor(comp_cd$model, levels=c('model S', 'model I'))
                                       
unique(comp_cd$Fpair_model)



comp_cd_fig <- 
  ggplot(comp_cd, aes(x = h, y = diff, group = Fpair)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  geom_line() +
  geom_line(y=0,  linetype = 2) +
  facet_grid(model ~ Fpair) +
  coord_cartesian(ylim = c(-1.5,1)) +
  labs(x = 'hour', y = 'Difference in hour trend') +
  scale_x_discrete(name ="hour", limits=seq(0,24,by=4))+
  theme_bw()

plot(comp_cd_fig)

ggsave(comp_cd_fig, file= "Figure 6.png", width = 150, height = 100, units = c("mm"), dpi = 300)