library(ggplot2); library(gridExtra); library(XLConnect); library(lubridate); library(plyr); library(ggpubr); library(sjstats); library(car); library(MASS); library(rms); library(Hmisc); library(bestNormalize); library(rlang); library(dplyr); library(bbmle); library(survival); library(rcompanion); library(scales); library(survminer); library(glmmTMB); library(AICcmodavg); library(DHARMa); library(MuMIn); library(tidyverse); library(ggeffects); library(mgcv); library(devtools); library(gratia); library(RColorBrewer)

setwd("H:/Work/2019/Data/R script miscellaneous/Pythons")
datwd <- "H:/Work/2019/Data/R script miscellaneous/Pythons"
#_____________________________________________________________________________________________________________________

long <- loadWorkbook("Python_PIT_Trial_record_sheets_t0-t5 8-6-2019_long-format.xlsx")
getSheets(long)
long <- readWorksheet(long, sheet = "Time_05_long", header = TRUE)

#_____________________________________________________________________________________________________________________

#### GENERATE SCALED MASS INDEX VALUES AS MEASURE OF BODY CONDITION ####

# See Peig and Green 2009 'New perspectives for estimating body condition from mass/length data: the scaled mass index as an alternative method'

# scaled mass index Mi^ = Mi(L0/Li)^bsma

#L0 values 
L0.w <- mean(long$length_t, na.rm = TRUE); L0.w

# bsma values for each time period
# bsma values can be calculated at the following webpage (http://www.kimvdlinde.com/professional/rma.html) - make sure to tick the 'log transform the data' box. Save the raw weight and length values as a text file and this can be imported into the program at the webpage. Weight values in the first column and length values in the second.
bsma.w <- 3.495

# scaled mass index values
long$smi_t_w <- long$weight_t*(L0.w/long$length_t)^bsma.w

# scale_id number 84 has an outlier at sample = 3, it increased in weight more than 3 times but only grew in length by 1cm, replace the values for weight, length and body condition with NA
long$weight_t[long$scale_id == 84 & long$sample == 3] <- NA
long$length_t[long$scale_id == 84 & long$sample == 3] <- NA
long$smi_t_w[long$scale_id == 84 & long$sample == 3] <- NA

#_____________________________________________________________________________________________________________________

#### DEFINE FACTORS, NUMERIC VARIABLES, DATES ####

long$sex_t0 <- factor(long$sex_t0)
long$clutch_id <- factor(long$clutch_id)
long$pen_id_t0 <- factor(long$pen_id_t0)
long$scale_id <- factor(long$scale_id)
long$tagged <- factor(long$tagged)
long$tagger_id <- factor(long$tagger_id)
long$tag_location <- factor(long$tag_location)
long$event1 <- factor(long$event1)
long$sample <- factor(long$sample)

long$weight_t <- as.numeric(long$weight_t)
long$length_t <- as.numeric(long$length_t)
long$bmi_t <- as.numeric(long$bmi_t)
long$days_survived <- as.numeric(long$days_survived)
long$smi_t <- as.numeric(long$smi_t)
long$smi_t_w <- as.numeric(long$smi_t_w)

# Define dates
long$date_ <- ymd(long$date_, tz = NULL)
long$dob <- ymd(long$dob, tz = NULL)
long$dod <- ymd(long$dod, tz = NULL)
#_____________________________________________________________________________________________________________________

#### CREATE NEW VARIABLES ####

# Reclassify tagged as 0 or 1
long$tagged <- ifelse(long$tagged == "N", 0, 1)
long$tagged <- factor(long$tagged)

# Calculate snake ages
long$age <- as.numeric(long$date_ - long$dob)

# Calculate how old snake was when died or how old if survived to end of study
long$age.death <- ifelse(!is.na(as.numeric(long$dod - long$dob)), 
                         as.numeric(long$dod - long$dob), 
                         as.numeric(ymd("2019-06-08", tz = NULL) - long$dob))

# Snakes were born across 3 consecutuve days. For mathematical convergence and without the loss of too much (if any) info, assign all snakes the same birth date at 2018-05-19, the middle date of the 3 consecutive birth dates.
long$age.x <- as.numeric(long$date_ - ymd("2018-05-19"))
#_____________________________________________________________________________________________________________________

#### CREATE CONTINUOUS VARIABLE REPRESENTING PYTHON WEIGHT, SVL, BC AT HATCHING ####
# Weight at hatching
long <- long %>% 
 group_by(scale_id) %>% 
 mutate(hatch_wt = if_else(sample == "0", weight_t, NA_real_)) %>%
 fill(hatch_wt)

# SVL at hatching
long <- long %>% 
 group_by(scale_id) %>% 
 mutate(hatch_SVL = if_else(sample == "0", length_t, NA_real_)) %>%
 fill(hatch_SVL)

# BC at hatching
long <- long %>% 
 group_by(scale_id) %>% 
 mutate(hatch_smi = if_else(sample == "0", smi_t_w, NA_real_)) %>%
 fill(hatch_smi)
#____________________________________________________________________________________________________________________

#### IS TAGGER ASSOCIATED WITH PEN ####
table(long$tagger_id, long$pen_id_t0)

#       1  2  3  4  5  6  7  8
# 0    72 78 72 78 72 78 72 78
# Huu   0  0 30 72 78 30  0  0
# Kien 78 72 48  0  0  0  0  0
# Nhut  0  0  0  0  0 42 78 72

table(long$tagged, long$tagger_id) #210 pythons tagged by Huu, 198 pythons tagged by Kien, 192 pythons tagged by Nhut

#     0 Huu Kien Nhut
# 0 600   0    0    0
# 1   0 210  198  192

# tagger_id is associated with pen. Huu tagged no animals in pens 1, 2, 7 or 8, but tagged animals in pens 3-6. Kien tagged animals in pens 1, 2, and 3, but tagged no animals in pens 4-8. Nhut tagged animals in pens 6-8, but tagged no animals in pens 1-5.
#_______________________________________________________________________________________________________________________________

#### CONFIRMATION OF RANDOM ALLOCATION AMONG SEXES ####

# Number males with PIT tags
plyr::count(long$scale_id[long$sex_t0 == "m" & long$tagged == 1]) #46

# Number males with no PIT tags
plyr::count(long$scale_id[long$sex_t0 == "m" & long$tagged == 0]) #49

# Number females with PIT tags
plyr::count(long$scale_id[long$sex_t0 == "f" & long$tagged == 1]) #54

# Number females no PIT tags
plyr::count(long$scale_id[long$sex_t0 == "f" & long$tagged == 0]) #51

# Test for a difference in the proportion of male and female snakes allocated to the PIT tag and non-PIT tag groups using chi squared test.
table <- matrix(c(46, 54, 49, 51), 
                nrow = 2, 
                dimnames = list(Guess = c("M", "F"), 
                                Truth = c("PIT", "No PIT")))
table
X2 <- chisq.test(table, simulate.p.value = TRUE); X2 # x2 = 0.18045, p = 0.7661

# Test degrees of freedom
df <- (2-1)*(2-1); df #1
#____________________________________________________________________________________________

#### CONFIRMATION OF RANDOM ALLOCATION AMONG CLUTCHES ####

# Number PIT tag and non-PIT tag individuals in each clutch
# Clutch 1
plyr::count(long$scale_id[long$clutch_id == "1" & long$tagged == 1]) #14
plyr::count(long$scale_id[long$clutch_id == "1" & long$tagged == 0]) #13

# Clutch 2
plyr::count(long$scale_id[long$clutch_id == "2" & long$tagged == 1]) #15
plyr::count(long$scale_id[long$clutch_id == "2" & long$tagged == 0]) #15

# Clutch 3
plyr::count(long$scale_id[long$clutch_id == "3" & long$tagged == 1]) #10
plyr::count(long$scale_id[long$clutch_id == "3" & long$tagged == 0]) #11

# Clutch 4
plyr::count(long$scale_id[long$clutch_id == "4" & long$tagged == 1]) #14
plyr::count(long$scale_id[long$clutch_id == "4" & long$tagged == 0]) #13

# Clutch 5
plyr::count(long$scale_id[long$clutch_id == "5" & long$tagged == 1]) #13
plyr::count(long$scale_id[long$clutch_id == "5" & long$tagged == 0]) #13

# Clutch 6
plyr::count(long$scale_id[long$clutch_id == "6" & long$tagged == 1]) #14
plyr::count(long$scale_id[long$clutch_id == "6" & long$tagged == 0]) #14

# Clutch 7
plyr::count(long$scale_id[long$clutch_id == "7" & long$tagged == 1]) #10
plyr::count(long$scale_id[long$clutch_id == "7" & long$tagged == 0]) #10

# Clutch 8
plyr::count(long$scale_id[long$clutch_id == "8" & long$tagged == 1]) #10
plyr::count(long$scale_id[long$clutch_id == "8" & long$tagged == 0]) #11

# Test for a difference in the proportion of PIT tag and non-PIT tag individuals between clutches 
table <- matrix(c(14, 15, 10, 14, 13, 14, 10, 10, 13, 15, 11, 13, 13, 14, 10, 11), 
                nrow = 8, 
                dimnames = list(Guess = c("C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8"), 
                                Truth = c("PIT", "No PIT")))
table
X2 <- chisq.test(table, simulate.p.value = TRUE); X2 # x2 = 0.16931; p = 1

# Test degrees of freedom
df <- (8-1)*(2-1); df #7
#____________________________________________________________________________________________

#### CONFIRMATION OF RANDOM ALLOCATION AMONG PENS ####

# Number PIT tag and non-PIT tag individuals in each pen
# Pen 1
plyr::count(long$scale_id[long$pen_id_t0 == "1" & long$tagged == 1]) #13
plyr::count(long$scale_id[long$pen_id_t0 == "1" & long$tagged == 0]) #12

# Pen 2
plyr::count(long$scale_id[long$pen_id_t0 == "2" & long$tagged == 1]) #12
plyr::count(long$scale_id[long$pen_id_t0 == "2" & long$tagged == 0]) #13

# Pen 3
plyr::count(long$scale_id[long$pen_id_t0 == "3" & long$tagged == 1]) #13
plyr::count(long$scale_id[long$pen_id_t0 == "3" & long$tagged == 0]) #12

# Pen 4
plyr::count(long$scale_id[long$pen_id_t0 == "4" & long$tagged == 1]) #12
plyr::count(long$scale_id[long$pen_id_t0 == "4" & long$tagged == 0]) #13

# Pen 5
plyr::count(long$scale_id[long$pen_id_t0 == "5" & long$tagged == 1]) #13
plyr::count(long$scale_id[long$pen_id_t0 == "5" & long$tagged == 0]) #12

# Pen 6
plyr::count(long$scale_id[long$pen_id_t0 == "6" & long$tagged == 1]) #12
plyr::count(long$scale_id[long$pen_id_t0 == "6" & long$tagged == 0]) #13

# Pen 7
plyr::count(long$scale_id[long$pen_id_t0 == "7" & long$tagged == 1]) #13
plyr::count(long$scale_id[long$pen_id_t0 == "7" & long$tagged == 0]) #12

# Pen 8
plyr::count(long$scale_id[long$pen_id_t0 == "8" & long$tagged == 1]) #12
plyr::count(long$scale_id[long$pen_id_t0 == "8" & long$tagged == 0]) #13

# Test for a difference in the proportion of PIT tag and non-PIT tag individuals between pens 
table <- matrix(c(13, 12, 13, 12, 13, 12, 13, 12, 12, 13, 12, 13, 12, 13, 12, 13), 
                nrow = 8, 
                dimnames = list(Guess = c("P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8"), 
                                Truth = c("PIT", "No PIT")))
table
X2 <- chisq.test(table, simulate.p.value = TRUE); X2 # x2 = 0.32; p = 1

# Test degrees of freedom
df <- (8-1)*(2-1); df #7
#____________________________________________________________________________________________

#### CONFIRMATION OF RANDOM ALLOCATION AMONG TAGGERS ####

# Number PIT tag and non-PIT tag individuals among taggers
# Tagger Huu
plyr::count(long$scale_id[long$tagger_id == "Huu" & long$tagged == 1]) #35
plyr::count(long$scale_id[long$tagger_id == "Huu" & long$tagged == 0]) # Note that this is zero as the pythons were not tagged

# Tagger Kien
plyr::count(long$scale_id[long$tagger_id == "Kien" & long$tagged == 1]) #33
plyr::count(long$scale_id[long$tagger_id == "Kien" & long$tagged == 0]) # Note that this is zero as the pythons were not tagged

# Tagger Nhut
plyr::count(long$scale_id[long$tagger_id == "Nhut" & long$tagged == 1]) #32
plyr::count(long$scale_id[long$tagger_id == "Nhut" & long$tagged == 0]) # Note that this is zero as the pythons were not tagged
#____________________________________________________________________________________________

#### ASSOCIATION BETWEEN SNAKE WEIGHT AND PIT TAG GROUP AT T0 ####

# First split our pit tag individuals from our untagged individuals so we can compare easy
PT <- subset(long, sample == "0" & tagged == 1)
NPT <- subset(long, sample == "0" & tagged == 0)

#t-test #note that we are using a t-test and not a glm with random effects for clutch, tagger or pen because tagged and untagged animals were randomly distributed among these groups
t.test(PT$weight_t, NPT$weight_t) #t = 1.1853, df = 197.97, p-value = 0.2373

# Mean and SD of weight by groups # Note that is 
mean(PT$weight_t) #103.94
range(PT$weight_t)# 70, 141
sd(PT$weight_t) #15.42189
mean(NPT$weight_t) #101.34
sd(NPT$weight_t) #15.59929
range(NPT$weight_t) #74, 137

#check normality and homoscedasticy assumptions for t test
hist(PT$weight_t)
hist(NPT$weight_t)
shapiro.test(PT$weight_t)
shapiro.test(NPT$weight_t)
var(PT$weight_t) #237.8347
var(NPT$weight_t) #243.3378
#____________________________________________________________________________________________

#### ASSOCIATION BETWEEN SNAKE LENGTH AND PIT TAG GROUP AT T0 ####

#t-test
t.test(PT$length_t, NPT$length_t) #t = 0.30457, df = 197.48, p-value = 0.761

# Mean and SD of weight by groups
mean(PT$length_t) #66.69
range(PT$length_t) #58, 75
sd(PT$length_t) #3.8447
mean(NPT$length_t) #66.52
range(NPT$length_t) #56, 75
sd(NPT$length_t) #4.046398

#check normality and homoscedasticy assumptions for t test
hist(PT$length_t)
hist(NPT$length_t)
shapiro.test(PT$length_t)
shapiro.test(NPT$length_t)
var(PT$length_t) #14.78172
var(NPT$length_t) #16.37333
#____________________________________________________________________________________________

#### ASSOCIATION BETWEEN SNAKE STANDARD MASS INDEX AND PIT TAG GROUP AT T0 ####

#t-test
t.test(PT$smi_t_w, NPT$smi_t_w) #t = 0.64478, df = 197.8, p-value = 0.5198

# Mean and SD of weight by groups
mean(PT$smi_t_w) #1389.401
range(PT$smi_t_w) #977.8878 2363.5874
sd(PT$smi_t_w) #220.9203
mean(NPT$smi_t_w) #1368.928
range(NPT$smi_t_w) #740.0232 2298.8777
sd(NPT$smi_t_w) #228.0593

#check normality and homoscedasticy assumptions for t test
hist(PT$smi_t_w)
hist(NPT$smi_t_w)
shapiro.test(PT$smi_t_w)
shapiro.test(NPT$smi_t_w)
var(PT$smi_t_w) #48805.79
var(NPT$smi_t_w) #52011.04
#_______________________________________________________________________________________________________________________________

#### DESCRIPTIVE FIGURES, MS FIGURE #2
# Create figure to extract legend from
# f1 <- ggplot(long, aes(x = age.x, y = weight_t, colour = tagged, group = scale_id)) + 
#   geom_line(size = 1.2) + 
#   scale_colour_manual(labels = c("Untagged", "Tagged"), values = c("#F8766D", "#00BFC4")) +
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("Age (days)") + 
#   ylab("Body mass (g)"); f1
# 
# # Plot weight of pythons through time by tagged vs untagged
# f2 <- ggplot(long, aes(x = age.x, y = weight_t, colour = tagged, group = scale_id)) + 
#   geom_line(size = 1.2) + 
#   scale_colour_manual(labels = c("Untagged", "Tagged"), values = c("#F8766D", "#00BFC4")) +
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("Age (days)") + 
#   ylab("Body mass (g)") + 
#   theme(legend.position = "none"); f2
# 
# # Plot length of pythons through time by tagged vs untagged
# f3 <- ggplot(long, aes(x = age.x, y = length_t, colour = tagged, group = scale_id)) + 
#   geom_line(size = 1.2) + 
#   scale_colour_manual(labels = c("Untagged", "Tagged"), values = c("#F8766D", "#00BFC4")) +
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("Age (days)") + 
#   ylab("SVL (cm)") + 
#   theme(legend.position = "none"); f3
# 
# # Plot body condition of pythons through time by tagged vs untagged
# f4 <- ggplot(long, aes(x = age.x, y = smi_t_w, colour = tagged, group = scale_id)) + 
#   geom_line(size = 1.2) + 
#   scale_colour_manual(labels = c("Untagged", "Tagged"), values = c("#F8766D", "#00BFC4")) +
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("Age (days)") + 
#   ylab("Body condition (g)") + 
#   theme(legend.position = "none"); f4
# 
# # Create function to extract legend from f1 then extract legend
# g_legend <- function(a.gplot){
#   tmp <- ggplot_gtable(ggplot_build(a.gplot))
#   leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
#   legend <- tmp$grobs[[leg]]
#   return(legend)}
# 
# mylegend <- g_legend(f1)
# 
# # Multi-panel plot of weight, length and body condition through time for tagged vs untagged pythons
# x11(); ggarrange(f2, f3, f4, mylegend, labels = c("A", "B", "C"), ncol = 2, nrow = 2, align = "v")
#_______________________________________________________________________________________________________________________________

#### GAMS FOR PYTHON WEIGHT

# Null model
wt0 <- gam(weight_t ~ 
             1, 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt0$aic # 20468.74

# Tagged as fixed effect
wt1 <- gam(weight_t ~ 
             tagged, 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt1$aic # 20470.55
# tagged as discrete fixed effect is not supported but keep in as this is main aim of study

# Tagged as fixed effect, plus smooth for age through time with k = 4
wt2 <- gam(weight_t ~ 
             tagged + 
             s(age.x, k = 4), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt2$aic # 18981.41
# Smooth for age through time is supported

# Tagged as fixed effect, plus smooth for age through time with k = 5
wt3 <- gam(weight_t ~ 
             tagged + 
             s(age.x, k = 5), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt3$aic # 18971.67
# Smooth for age through time with k = 5 is better than with k = 4

# Tagged as fixed effect, plus smooth for age through with k = 6
wt4 <- gam(weight_t ~ 
             tagged + 
             s(age.x, k = 6), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt4$aic # 18972.93
# Smooth for age through time with k = 6 is not supported over same smooth with k = 5

# Tagged as fixed effect, plus separate smooth for tagged and untagged snakes through time 
wt5 <- gam(weight_t ~ 
             tagged +
             s(age.x, by = tagged, k = 5),
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt5$aic # 18978.32   
# Separate smooths for tagged and untagged snakes through time is not supported but keep in so can plot smooths separately

# Tagged and sex as fixed effects, plus separate smooth for tagged and untagged snakes through time
wt6 <- gam(weight_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 5), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt6$aic # 18971.65
# Sex as discrete fixed effect is supported

# Tagged and sex as fixed effects, plus separate smooth for tagged and untagged snakes through time and separate smooths for sexes through time 
wt7 <- gam(weight_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 5) +
             s(age.x, by = sex_t0, k = 5), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt7$aic # 18963.64
# Separate smooths for sex through time are supported

# Tagged and sex as fixed effects, plus separate smooth for tagged and untagged snakes through time and separate smooths for sexes through time, plus random intercept for individual snakes 
wt8 <- gam(weight_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 5) +
             s(age.x, by = sex_t0, k = 5) + 
             s(scale_id, bs = "re"), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt8$aic # 18776.28 
# Random intercepts for in individual snakes is not supported

# Tagged and sex as fixed effects, plus separate smooth for tagged and untagged snakes through time and separate smooths for sexes through time, plus random intercept for individual snakes, plus random slopes for individual snakes through time 
wt9 <- gam(weight_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 5) +
             s(age.x, by = sex_t0, k = 5) + 
             s(scale_id, bs = "re") + 
             s(age.x, scale_id, bs = "re"), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
wt9$aic # 17616.92
# Random slopes for individual snakes through time are supported

# wt9 is most parsimonious model, now check that adequately meets assumptions
gam.check(wt9, rep = 500) 
appraise(wt9, method = 'simulate')

summary(wt9) # Note deviance explained is very high
# Family: gaussian 
# Link function: identity 
# 
# Formula:
# weight_t ~ tagged + sex_t0 + s(age.x, by = tagged, k = 5) + s(age.x, by = sex_t0, k = 5) + s(scale_id, bs = "re") + s(age.x, scale_id, bs = "re")
# 
# Parametric coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  2677.26     130.11  20.578   <2e-16 ***
# tagged1        64.09     150.45   0.426    0.670    
# sex_t0m      -218.48     150.64  -1.450    0.147    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Approximate significance of smooth terms:
#                     edf   Ref.df       F p-value    
# s(age.x):tagged0    0.50516   0.8218   0.136   0.738    
# s(age.x):tagged1    1.06435   1.1047   0.004   0.936    
# s(age.x):sex_t0f    3.88597   3.9887 146.465  <2e-16 ***
# s(age.x):sex_t0m    3.85586   3.9830 120.557  <2e-16 ***
# s(scale_id)         0.02266 197.0000   0.000   1.000    
# s(age.x,scale_id) 173.43795 197.0000  16.187  <2e-16 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Rank: 418/419
# R-sq.(adj) =  0.937   Deviance explained = 94.8%
# -REML = 8959.5  Scale est. = 5.0851e+05  n = 1092

### Plot predicted growth curves for tagged and untaged and male and female pythons across age
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Population averaged predictions from fitted gam wt9
preds <- predict(wt9, 
                 newdata = pred.dat, 
                 exclude = c("s(scale_id)", 
                             "s(age.x,scale_id)"), 
                 se = T)

# Combine predictions to new data frame for plotting
pred.dat <- cbind(pred.dat, fit = preds$fit)
pred.dat <- cbind(pred.dat, se.fit = preds$se.fit)

# Calculate 95% CI for predictions from predicted standard errors
pred.dat$lci <- pred.dat$fit - (1.96*pred.dat$se.fit)
pred.dat$uci <- pred.dat$fit + (1.96*pred.dat$se.fit)

# Plot predicted weight for tagged and untagged, and male and female, pythons through time (+/- 95% CI)
mycolours1 <- brewer.pal(4, "Blues")[3:4]
mycolours2 <- brewer.pal(4, "Greens")[3:4]

f2a <- ggplot() + 
  geom_point(data = long, mapping = aes(x = age.x, y = weight_t), alpha = 0.18, colour = "gray90") + 
  geom_line(data = long, mapping = aes(x = age.x, y = weight_t, group = scale_id), alpha = 0.18, colour = "gray90") +
  geom_ribbon(data = pred.dat, mapping = aes(x = age.x, ymin = lci, ymax = uci, fill = tagged:sex_t0), colour = NA, alpha = 0.2) +
  geom_line(data = pred.dat, mapping = aes(x = age.x, y = fit, colour = tagged:sex_t0), alpha = 1, size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Body mass (g)"); f2a

f2a.1 <- ggplot(pred.dat, aes(x = age.x, y = fit, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.18, colour = NA) +
  geom_line(size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Body mass (g)"); f2a.1

# Plot first derivative curves for male and female, tagged and untagged pythons through time
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Generate posterior draws from fitted model
pd1 <- fitted_samples(wt9, n = 100, newdata = pred.dat, seed = 10)

# Add tiny bit to age.x in pred.dat for derivative calculation
pred.dat2 <- pred.dat
pred.dat2$age.x <- pred.dat$age.x + 0.01

# Generate posterior draws from fitted model 
pd2 <- fitted_samples(wt9, n = 100, newdata = pred.dat2, seed = 10)

# Calculate derivative
pd1$fd <- (pd2$fitted - pd1$fitted) / 0.01

# Attach age, tagged and sex data
age <- seq(9, 384, 1)
pd1$age.x <- rep(age, 400)
pd1$age.x <- as.factor(pd1$age.x)
sex <- c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376))
pd1$sex_t0 <- rep(sex, 100)
pd1$sex_t0 <- as.factor(pd1$sex_t0)
tagged <- c(rep(0, 752), rep(1, 752))
pd1$tagged <- rep(c(rep(0, 752), rep(1, 752)), 100)
pd1$tagged <- as.factor(pd1$tagged)

# Create function to calculate standard error
std <- function(x) sd(x)/sqrt(length(x))

# Group pd1 by tagged and sex and take mean and standard deviation for each combination of tagged*sex
f2b <- pd1 %>% dplyr::select(tagged, sex_t0, age.x, fd) %>% group_by(tagged, sex_t0, age.x) %>% summarise_all(funs(mean(.), std(.)))
f2b$age.x <- as.numeric(f2b$age.x)
f2b$lci <- f2b$mean - (1.96*f2b$std)
f2b$uci <- f2b$mean + (1.96*f2b$std)

#
fig.f2b <- ggplot(f2b, aes(x = age.x, y = mean, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) +
  geom_line(size = 1.2) +
scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Gorwth rate BM (g/day)"); fig.f2b

# Multi-panel figure for weight
x11(); ggarrange(f2a, fig.f2b, labels = c("A", "B"), ncol = 2, nrow = 1, align ="h")


# Is there a correlation between hatching weight and subsequent weight
# First create time specific datasets for each time point at which pythons were measured
c1 <- filter(long, sample == "1")
c2 <- filter(long, sample == "2")
c3 <- filter(long, sample == "3")
c4 <- filter(long, sample == "4")
c5 <- filter(long, sample == "5")

#Correlation plot time 1
cor1.wt <- ggscatter(c1, 
                  x = "hatch_wt", 
                  y = "weight_t", 
                  add = "reg.line", 
                  add.params = list(color = "blue", fill = "lightgray"), 
                  conf.int = TRUE) + 
  stat_cor(method = "pearson") + 
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("BM at hatching (g)") + 
  ylab("BM at age = 73 days (g)"); cor1.wt

# Correlation plot time 2
cor2.wt <- ggscatter(c2, 
                     x = "hatch_wt", 
                     y = "weight_t", 
                     add = "reg.line", 
                     add.params = list(color = "blue", fill = "lightgray"), 
                     conf.int = TRUE) + 
  stat_cor(method = "pearson") + 
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("BM at hatching (g)") + 
  ylab("BM at age = 134 days (g)"); cor2.wt

# Correlation plot time 3
cor3.wt <- ggscatter(c3, 
                     x = "hatch_wt", 
                     y = "weight_t", 
                     add = "reg.line", 
                     add.params = list(color = "blue", fill = "lightgray"), 
                     conf.int = TRUE) + 
  stat_cor(method = "pearson") + 
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("BM at hatching (g)") + 
  ylab("BM at age = 220 days (g)"); cor3.wt

# Correlation plot time 4
cor4.wt <- ggscatter(c4, 
                     x = "hatch_wt", 
                     y = "weight_t", 
                     add = "reg.line", 
                     add.params = list(color = "blue", fill = "lightgray"), 
                     conf.int = TRUE) + 
  stat_cor(method = "pearson") + 
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("BM at hatching (g)") + 
  ylab("BM at age = 292 days (g)"); cor4.wt

# Correlation plot time 5
cor5.wt <- ggscatter(c5, 
                     x = "hatch_wt", 
                     y = "weight_t", 
                     add = "reg.line", 
                     add.params = list(color = "blue", fill = "lightgray"), 
                     conf.int = TRUE) + 
  stat_cor(method = "pearson") + 
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("BM at hatching (g)") + 
  ylab("BM at age = 385 days (g)"); cor5.wt

# Multi-panel correlation plot for python weight
x11(); ggarrange(cor1.wt, cor2.wt, cor3.wt, cor4.wt, cor5.wt, labels = c("A", "B", "C", "D", "E"), ncol = 5, nrow = 1)
#_______________________________________________________________________________________________________________

#### GAMS FOR PYTHON length

# Null model
l0 <- gam(length_t ~ 
             1, 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l0$aic # 11947.47  

# Tagged as fixed effect
l1 <- gam(length_t ~ 
             tagged, 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l1$aic # 11949.15 
# tagged as discrete fixed effect is not supported but keep in as this is main aim of study

# Tagged as fixed effect, plus smooth for age through time with k = 4. Note that for each python we only have a max of 6 data points so we cannot have k greater than 7
l2 <- gam(length_t ~ 
             tagged + 
             s(age.x, k = 4), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l2$aic # 9131.457
# Smooth for age through time is supported

# Tagged as fixed effect, plus smooth for age through time with k = 5
l3 <- gam(length_t ~ 
             tagged + 
             s(age.x, k = 5), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l3$aic # 9101.793  
# Smooth for age through time with k = 5 is better than with k = 4

# Tagged as fixed effect, plus smooth for age through time with k = 6
l4 <- gam(length_t ~ 
             tagged + 
             s(age.x, k = 6), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l4$aic # 9093.755 
# Smooth for age through time with k = 6 is better than with k = 5

# Tagged as fixed effect, plus separate smooths for tagged and untagged snakes through time 
l5 <- gam(length_t ~ 
             tagged +
             s(age.x, by = tagged, k = 6),
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l5$aic # 9101.841    
## Separate smooth for tagged and untagged snakes through time are not supported, but keep in so we can predict smooths for tagged and untagged snakes 

# Tagged and sex as fixed effects, plus separate smooth for tagged and untagged pythons through time
l6 <- gam(length_t ~ 
             tagged + 
             sex_t0 +
            s(age.x, by = tagged, k = 6), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l6$aic # 9100.215
# Sex as discrete fixed effect has minimal support

# Tagged and sex as fixed effects, plus separate smooths for sexes through time and separate smooths for tagged and untagged snakes through time 
l7 <- gam(length_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 6) +
             s(age.x, by = sex_t0, k = 6), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l7$aic # 9101.487
# Separate smooths for sexes through time are not supported but keep in so we can predict smooth for both sexes to demonstrate effects

# Tagged and sex as fixed effects, plus separate smooths for sexes and tagged and untagged pythons through time, plus random intercept for individual snakes 
l8 <- gam(length_t ~ 
            tagged + 
            sex_t0 +
            s(age.x, by = tagged, k = 6) +
            s(age.x, by = sex_t0, k = 6) +
            s(scale_id, bs = "re"), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
l8$aic # 8814.021
# Random intercept for individual snakes is supported

# Tagged and sex as fixed effects, plus separate smooth for sexes and tagged and untagged snakes through time, plus random intercept for individual snakes, plus random slopes for individual snakes through time 
l9 <- gam(length_t ~ 
             tagged + 
             sex_t0 +
             s(age.x, by = tagged, k = 6) +
             s(age.x, by = sex_t0, k = 6) +
             s(scale_id, bs = "re") + 
             s(age.x, scale_id, bs = "re"), 
           data = long, 
           method = "REML") #, family = Gamma(link = "log"))
l9$aic # 7992.942
# Random slopes for individual snakes through time are supported
# Our call to fitted_samples on line 953 does not run with this model. This suggests that the covariance matrix of our model is not positive definite and that the model is poorly defined or identified (too complex for the data), see here: https://stackoverflow.com/questions/65678593/error-chol-decomposition-failed-when-call-gratiafitted-samples?noredirect=1#comment116144629_65678593
# Reduce complexity of model by removing fixed effects

# Separate smooth for sexes and tagged and untagged snakes through time, plus random intercept for individual snakes, plus random slopes for individual snakes through time 
l10 <- gam(length_t ~ 
            s(age.x, by = tagged, k = 6) +
            s(age.x, by = sex_t0, k = 6) +
            s(scale_id, bs = "re") + 
            s(age.x, scale_id, bs = "re"), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
l10$aic # 7992.884
# This model is essentially the same as l9 - in regards to AIC

# Check that l10 adequately meets assumptions
gam.check(l10, rep = 500) 
appraise(l10, method = 'simulate')

summary(l10) # Note deviance explained is very high
# Family: gaussian 
# Link function: identity 
# 
# Formula:
# length_t ~ s(age.x, by = tagged, k = 6) + s(age.x, by = sex_t0, k = 6) + s(scale_id, bs = "re") + s(age.x, scale_id, bs = "re")
# 
# Parametric coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 138.9422     0.8457   164.3   <2e-16 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Approximate significance of smooth terms:
#                     edf  Ref.df       F p-value    
# s(age.x):tagged0    3.858   3.987  42.730  <2e-16 ***
# s(age.x):tagged1    4.827   4.982  51.132  <2e-16 ***
# s(age.x):sex_t0f    1.830   2.264  98.439  <2e-16 ***
# s(age.x):sex_t0m    1.014   1.021 224.258  <2e-16 ***
# s(scale_id)        15.435 199.000   0.268   0.169    
# s(age.x,scale_id) 170.677 199.000  13.826  <2e-16 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Rank: 420/421
# R-sq.(adj) =  0.977   Deviance explained = 98.1%
# -REML = 4173.6  Scale est. = 74.945    n = 1092

### Plot predicted SVL curves for tagged and untaged, and male and female, pythons across age
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Population averaged predictions from fitted gam l10
preds <- predict(l10, 
                 newdata = pred.dat, 
                 exclude = c("s(scale_id)", 
                             "s(age.x,scale_id)"), 
                 se = T)

# Combine predictions to new data frame for plotting
pred.dat <- cbind(pred.dat, fit = preds$fit)
pred.dat <- cbind(pred.dat, se.fit = preds$se.fit)

# Calculate 95% CI for predictions from predicted standard errors
pred.dat$lci <- pred.dat$fit - (1.96*pred.dat$se.fit)
pred.dat$uci <- pred.dat$fit + (1.96*pred.dat$se.fit)

# Plot predicted SVL for tagged and untagged, and male and female, pythons through time (+/- 95% CI)
f3a <- ggplot() + 
  geom_point(data = long, mapping = aes(x = age.x, y = length_t), alpha = 0.18, colour = "gray90") + 
  geom_line(data = long, mapping = aes(x = age.x, y = length_t, group = scale_id), alpha = 0.18, colour = "gray90") +
  geom_ribbon(data = pred.dat, mapping = aes(x = age.x, ymin = lci, ymax = uci, fill = tagged:sex_t0), colour = NA, alpha = 0.2) +
  geom_line(data = pred.dat, mapping = aes(x = age.x, y = fit, colour = tagged:sex_t0), alpha = 1, size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("SVL (cm)"); f3a

f3a.1 <- ggplot(pred.dat, aes(x = age.x, y = fit, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.18, colour = NA) + 
  geom_line(size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("SVL (cm)"); f3a.1


# Plot first derivative curves for male and female, tagged and untagged pythons through time
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Generate posterior draws from fitted model
pd1 <- fitted_samples(l10, n = 100, newdata = pred.dat, seed = 10)

# Add tiny bit to age.x in pred.dat for derivative calculation
pred.dat2 <- pred.dat
pred.dat2$age.x <- pred.dat$age.x + 0.01

# Generate posterior draws from fitted model 
pd2 <- fitted_samples(l10, n = 100, newdata = pred.dat2, seed = 10)

# Calculate derivative
pd1$fd <- (pd2$fitted - pd1$fitted) / 0.01

# Attach age, tagged and sex data
age <- seq(9, 384, 1)
pd1$age.x <- rep(age, 400)
pd1$age.x <- as.factor(pd1$age.x)
sex <- c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376))
pd1$sex_t0 <- rep(sex, 100)
pd1$sex_t0 <- as.factor(pd1$sex_t0)
tagged <- c(rep(0, 752), rep(1, 752))
pd1$tagged <- rep(c(rep(0, 752), rep(1, 752)), 100)
pd1$tagged <- as.factor(pd1$tagged)

# Group pd1 by tagged and sex and take mean and standard deviation for each combination of tagged*sex
f3b <- pd1 %>% dplyr::select(tagged, sex_t0, age.x, fd) %>% group_by(tagged, sex_t0, age.x) %>% summarise_all(funs(mean(.), std(.)))
f3b$age.x <- as.numeric(f3b$age.x)
f3b$lci <- f3b$mean - (1.96*f3b$std)
f3b$uci <- f3b$mean + (1.96*f3b$std)

#
fig.f3b <- ggplot(f3b, aes(x = age.x, y = mean, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) +
  geom_line(size = 1.2) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Gorwth rate SVL (cm/day)"); fig.f3b

# Multi-panel figure for weight
x11(); ggarrange(f3a, fig.f3b, labels = c("A", "B"), ncol = 2, nrow = 1, align ="h")


# Is there a correlation between hatching SVL and subsequent SVL
# Correlation plot for time 1
# cor1.SVL <- ggscatter(c1, 
#                      x = "hatch_SVL", 
#                      y = "length_t", 
#                      add = "reg.line", 
#                      add.params = list(color = "blue", fill = "lightgray"), 
#                      conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("SVL at hatching (cm)") + 
#   ylab("SVL at age = 73 days (cm)"); cor1.SVL

# Correlation plot for time 2
# cor2.SVL <- ggscatter(c2, 
#                      x = "hatch_SVL", 
#                      y = "length_t", 
#                      add = "reg.line", 
#                      add.params = list(color = "blue", fill = "lightgray"), 
#                      conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("SVL at hatching (cm)") + 
#   ylab("SVL at age = 134 days (cm)"); cor2.SVL

# Correlation plot for time 3
# cor3.SVL <- ggscatter(c3, 
#                      x = "hatch_SVL", 
#                      y = "length_t", 
#                      add = "reg.line", 
#                      add.params = list(color = "blue", fill = "lightgray"), 
#                      conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("SVL at hatching (cm)") + 
#   ylab("SVL at age = 220 days (cm)"); cor3.SVL

# Correlation plot for time 4
# cor4.SVL <- ggscatter(c4, 
#                      x = "hatch_SVL", 
#                      y = "length_t", 
#                      add = "reg.line", 
#                      add.params = list(color = "blue", fill = "lightgray"), 
#                      conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("SVL at hatching (cm)") + 
#   ylab("SVL at age = 292 days (cm)"); cor4.SVL

# Correlation plot for time 5
# cor5.SVL <- ggscatter(c5, 
#                      x = "hatch_SVL", 
#                      y = "length_t", 
#                      add = "reg.line", 
#                      add.params = list(color = "blue", fill = "lightgray"), 
#                      conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("SVL at hatching (cm)") + 
#   ylab("SVL at age = 385 days (cm)"); cor5.SVL

# Multi-panel correlation plot for python length
# x11(); ggarrange(cor1.SVL, cor2.SVL, cor3.SVL, cor4.SVL, cor5.SVL, labels = c("A", "B", "C", "D", "E"), ncol = 5, nrow = 1)
#______________________________________________________________________________________________________________________

#### GAMS FOR PYTHON body condition

# Null model
smi0 <- gam(smi_t_w ~ 
            1, 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi0$aic # 15824.13  

# Tagged as fixed effect
smi1 <- gam(smi_t_w ~ 
            tagged, 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi1$aic # 15825.7 
# tagged as discrete fixed effect is not supported but leave in model as this is main interest of study

# Tagged as fixed effect, plus smooth for age through time with k = 4. Note that for each snake we only have 6 data points so K cannot be greater than 7
smi2 <- gam(smi_t_w ~ 
            tagged + 
            s(age.x, k = 4), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi2$aic # 15090.06 
# Smooth for age through time is supported

# Tagged as fixed effect, plus smooth for age through time with k = 5
smi3 <- gam(smi_t_w ~ 
            tagged + 
            s(age.x, k = 5), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi3$aic # 15073.34   
# Age smooth with k = 5 is better than with k = 4

# Tagged as fixed effect, plus smooth for age through time with k = 6
smi4 <- gam(smi_t_w ~ 
            tagged + 
            s(age.x, k = 6), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi4$aic # 15047.44  
# Age smooth with k = 6 has greatest support

# Tagged as fixed effect, plus separate smooth for tagged and untagged snakes through time 
smi5 <- gam(smi_t_w ~ 
            tagged +
            s(age.x, by = tagged, k = 6),
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi5$aic # 15044.65     
## Separate smooth for tagged and untagged snakes through time is supported 

# Tagged and sex as fixed effects, plus separate smooths for tagged and untagged snakes through time
smi6 <- gam(smi_t_w ~ 
            tagged + 
            sex_t0 +
            s(age.x, by = tagged, k = 6), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi6$aic # 15046.08
# Sex is not supported as a discrete fixed effect

# Tagged and sex as fixed effects, plus separate smooths for tagged and untagged snakes through time, plus separate smooths for sexes through time 
smi7 <- gam(smi_t_w ~ 
            tagged + 
            sex_t0 +
            s(age.x, by = tagged, k = 6) + 
            s(age.x, by = sex_t0, k = 6), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi7$aic # 15035.07
# Separate smooths for sexes through time are supported

# Tagged and sex as fixed effects, plus separate smooths for tagged and untagged snakes through time, plus separate smooths for sexes through time, plus random intercept for individual snakes 
smi8 <- gam(smi_t_w ~ 
            tagged + 
            sex_t0 +
            s(age.x, by = tagged, k = 6) + 
            s(age.x, by = sex_t0, k = 6) +
            s(scale_id, bs = "re"), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi8$aic # 15002.98
# Random intercept for individual snakes is supported

# Tagged and sex as fixed effects, plus separate smooths for tagged and untagged snakes through time, plus separate smooths for sexes through time, plus random intercept for individual snakes, plus random slopes for individual snakes through time 
smi9 <- gam(smi_t_w ~ 
            tagged + 
            sex_t0 +
            s(age.x, by = tagged, k = 6) + 
            s(age.x, by = sex_t0, k = 6) +
            s(scale_id, bs = "re") + 
            s(age.x, scale_id, bs = "re"), 
          data = long, 
          method = "REML") #, family = Gamma(link = "log"))
smi9$aic # 15002.98
# Random slopes for individual pythons through time are not supported 

# smi10 is most parsimonious body condition model, now check that adequately meets assumptions
gam.check(smi8, rep = 500) 
appraise(smi8, method = 'simulate')

summary(smi8) # Note deviance explained is reasonably high
# Family: gaussian 
# Link function: identity 
# 
# Formula:
# smi_t_w ~ tagged + sex_t0 + s(age.x, by = tagged, k = 6) + s(age.x, by = sex_t0, k = 6) + s(scale_id, bs = "re")
# 
# Parametric coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 1613.380     14.498 111.286   <2e-16 ***
# tagged1      -16.578     16.758  -0.989    0.323    
# sex_t0m       -9.381     16.777  -0.559    0.576    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Approximate significance of smooth terms:
#                   edf  Ref.df       F  p-value    
# s(age.x):tagged0  4.761   4.971 108.184  < 2e-16 ***
# s(age.x):tagged1  3.891   3.993 133.493  < 2e-16 ***
# s(age.x):sex_t0f  1.223   1.405   0.328    0.543    
# s(age.x):sex_t0m  1.037   1.067   0.001    0.972    
# s(scale_id)      64.366 197.000   0.490 6.07e-05 ***
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Rank: 222/223
# R-sq.(adj) =   0.56   Deviance explained = 59.1%
# -REML = 7489.7  Scale est. = 50529     n = 1092

### Plot predicted body condition curves for tagged and untaged and male and female pythons across age
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Population averaged predictions from fitted gam smi10 
preds <- predict(smi8, 
                 newdata = pred.dat, 
                 exclude = c("s(scale_id)", 
                             "s(age.x,scale_id)"), 
                 se = T)

# Combine predictions to new data frame for plotting
pred.dat <- cbind(pred.dat, fit = preds$fit)
pred.dat <- cbind(pred.dat, se.fit = preds$se.fit)

# Calculate 95% CI for predictions from predicted standard errors
pred.dat$lci <- pred.dat$fit - (1.96*pred.dat$se.fit)
pred.dat$uci <- pred.dat$fit + (1.96*pred.dat$se.fit)

f4a <- ggplot() + 
  geom_point(data = long, mapping = aes(x = age.x, y = smi_t_w), alpha = 0.18, colour = "gray90") + 
  geom_line(data = long, mapping = aes(x = age.x, y = smi_t_w, group = scale_id), alpha = 0.18, colour = "gray90") +
  geom_ribbon(data = pred.dat, mapping = aes(x = age.x, ymin = lci, ymax = uci, fill = tagged:sex_t0), colour = NA, alpha = 0.2) +
  geom_line(data = pred.dat, mapping = aes(x = age.x, y = fit, colour = tagged:sex_t0), alpha = 1, size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Body condition (g)"); f4a

f4a.1 <- ggplot(pred.dat, aes(x = age.x, y = fit, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.18, colour = NA) +
  geom_line(size = 1.5) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Body condition (g)"); f4a.1

# Plot first derivative curves for male and female, tagged and untagged pythons through time
# First create new data frame to predict from
pred.dat <- data.frame(tagged = c(rep(0, 752), rep(1, 752)), 
                       sex_t0 = c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376)),
                       age.x = c(rep(seq(9, 384, 1), 4)),
                       scale_id = rep(1, 1504))

# Define factors in new data frame
pred.dat$tagged <- factor(pred.dat$tagged)
pred.dat$sex_t0 <- factor(pred.dat$sex_t0)
pred.dat$scale_id <- factor(pred.dat$scale_id)

# Generate posterior draws from fitted model
pd1 <- fitted_samples(smi8, n = 100, newdata = pred.dat, seed = 10)

# Add tiny bit to age.x in pred.dat for derivative calculation
pred.dat2 <- pred.dat
pred.dat2$age.x <- pred.dat$age.x + 0.01

# Generate posterior draws from fitted model 
pd2 <- fitted_samples(smi8, n = 100, newdata = pred.dat2, seed = 10)

# Calculate derivative
pd1$fd <- (pd2$fitted - pd1$fitted) / 0.01

# Attach age, tagged and sex data
age <- seq(9, 384, 1)
pd1$age.x <- rep(age, 400)
pd1$age.x <- as.factor(pd1$age.x)
sex <- c(rep("f", 376), rep("m", 376), rep("f", 376), rep("m", 376))
pd1$sex_t0 <- rep(sex, 100)
pd1$sex_t0 <- as.factor(pd1$sex_t0)
tagged <- c(rep(0, 752), rep(1, 752))
pd1$tagged <- rep(c(rep(0, 752), rep(1, 752)), 100)
pd1$tagged <- as.factor(pd1$tagged)

# Group pd1 by tagged and sex and take mean and standard deviation for each combination of tagged*sex
f4b <- pd1 %>% dplyr::select(tagged, sex_t0, age.x, fd) %>% group_by(tagged, sex_t0, age.x) %>% summarise_all(funs(mean(.), std(.)))
f4b$age.x <- as.numeric(f4b$age.x)
f4b$lci <- f4b$mean - (1.96*f4b$std)
f4b$uci <- f4b$mean + (1.96*f4b$std)

#
fig.f4b <- ggplot(f4b, aes(x = age.x, y = mean, colour = tagged:sex_t0, fill = tagged:sex_t0)) + 
  geom_ribbon(aes(ymin = lci, ymax = uci), alpha = 0.2, colour = NA) +
  geom_line(size = 1.2) +
  scale_colour_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  scale_fill_manual(labels = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), values = c(mycolours1, mycolours2)) +
  theme_classic() + 
  theme(axis.title.x = element_text(face = "bold", size = 14), 
        axis.title.y = element_text(face = "bold", size = 14), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12),
        legend.text = element_text(size = 12), legend.title = element_blank()) + 
  xlab("Age (days)") + 
  ylab("Gorwth rate BC (g/day)"); fig.f4b

# Multi-panel figure for weight
x11(); ggarrange(f4a, fig.f4b, labels = c("A", "B"), ncol = 2, nrow = 1, align ="h")


# Is there a correlation between hatching body condition and subsequent body condition
# Correlation plot for time 1
# cor1.smi <- ggscatter(c1, 
#                       x = "hatch_smi", 
#                       y = "smi_t_w", 
#                       add = "reg.line", 
#                       add.params = list(color = "blue", fill = "lightgray"), 
#                       conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("BC at hatching (g)") + 
#   ylab("BC at age = 73 days (g)"); cor1.smi

# Correlation plot for time 2
# cor2.smi <- ggscatter(c2, 
#                       x = "hatch_smi", 
#                       y = "smi_t_w", 
#                       add = "reg.line", 
#                       add.params = list(color = "blue", fill = "lightgray"), 
#                       conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("BC at hatching (g)") + 
#   ylab("BC at age = 134 days (g)"); cor2.smi

# Correlation plot for time 3
# cor3.smi <- ggscatter(c3, 
#                       x = "hatch_smi", 
#                       y = "smi_t_w", 
#                       add = "reg.line", 
#                       add.params = list(color = "blue", fill = "lightgray"), 
#                       conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("BC at hatching (g)") + 
#   ylab("BC at age = 220 days (g)"); cor3.smi

# Correlation plot for time 4
# cor4.smi <- ggscatter(c4, 
#                       x = "hatch_smi", 
#                       y = "smi_t_w", 
#                       add = "reg.line", 
#                       add.params = list(color = "blue", fill = "lightgray"), 
#                       conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("BC at hatching (g)") + 
#   ylab("BC at age = 292 days (g)"); cor4.smi

# Correlation plot for time 5
# cor5.smi <- ggscatter(c5, 
#                       x = "hatch_smi", 
#                       y = "smi_t_w", 
#                       add = "reg.line", 
#                       add.params = list(color = "blue", fill = "lightgray"), 
#                       conf.int = TRUE) + 
#   stat_cor(method = "pearson") + 
#   theme_classic() + 
#   theme(axis.title.x = element_text(face = "bold", size = 14), 
#         axis.title.y = element_text(face = "bold", size = 14), 
#         axis.text.x = element_text(size = 12), 
#         axis.text.y = element_text(size = 12),
#         legend.text = element_text(size = 12), legend.title = element_blank()) + 
#   xlab("BC at hatching (g)") + 
#   ylab("BC at age = 385 days (g)"); cor5.smi

# Multi-panel correlation plot for body condition
# x11(); ggarrange(cor1.smi, cor2.smi, cor3.smi, cor4.smi, cor5.smi, labels = c("A", "B", "C", "D", "E"), ncol = 5, nrow = 1)

# Multi-panel correlation plot for weight, length and body condition
# x11(); ggarrange(cor1.wt, cor2.wt, cor3.wt, cor4.wt, cor5.wt, 
                 # cor1.SVL, cor2.SVL, cor3.SVL, cor4.SVL, cor5.SVL, 
                 # cor1.smi, cor2.smi, cor3.smi, cor4.smi, cor5.smi, 
                 # labels = c("A", "B", "C", "D", "E", 
                 #            "F", "G", "H", "I", "J", 
                 #            "K", "L", "M", "N", "O"), 
                 # ncol = 5, 
                 # nrow = 3, 
                 # align = "v")
#_____________________________________________________________________________________________________________________

#### SURVIVAL MODELS (SURVIVE VS NOT SURVIVE) ####

# Note: It doesn't make sense here to include individual as a random effect as there is no within individual variation in survival status, an individual either dies or is alive

# Null model
s0 <- glmmTMB(event2 ~  1, data = long, family = binomial)
AICcmodavg::AICc(s0) # 1266.582

# Tagged as discrete fixed effect
s1 <- glmmTMB(event2 ~ tagged, data = long, family = binomial) 
AICcmodavg::AICc(s1) # 1267.89 
# Tagged is not supported but leave in as this is main objective of study 

# Tagged and sex as discrete fixed effects
s2 <- glmmTMB(event2 ~ tagged + sex_t0, data = long, family = binomial)
AICcmodavg::AICc(s2) # 1258.962
# The inclusion of sex as discrete fixed effect is supported

# Tagged and sex as discrete fixed effects, plus hatch weight as continuous fixed effect
s3 <- glmmTMB(event2 ~ tagged + sex_t0 + hatch_wt, data = long, family = binomial)
AICcmodavg::AICc(s3) # 1260.806 
# Fixed effect for hatch weight is not supported 

# Tagged and sex as discrete fixed effects, plus hatch length as continuous fixed effect
s4 <- glmmTMB(event2 ~ tagged + sex_t0 + hatch_SVL, data = long, family = binomial) 
AICcmodavg::AICc(s4) # 1255.861
# Fixed effect for hatch length is supported

# Tagged and sex as discrete fixed effects, plus hatch length and hatch body condition as continuous fixed effect
s5 <- glmmTMB(event2 ~ tagged + sex_t0 + hatch_SVL + hatch_smi, data = long, family = binomial) 
AICcmodavg::AICc(s5) # 1249.962
# Fixed effect for hatch body condition is supported

# Tagged and sex as discrete fixed effects, plus hatch length and hatch body condition as continuous fixed effect, plus random intercept for clutch
s6 <- glmmTMB(event2 ~ tagged + sex_t0 + hatch_SVL + hatch_smi + (1 | clutch_id), data = long, family = binomial) 
AICcmodavg::AICc(s6) # 1207.42
# Random intercept for clutch is supported

# Tagged and sex as discrete fixed effects, plus hatch length and hatch body condition as continuous fixed effect, plus random intercept for clutch
s7 <- glmmTMB(event2 ~ tagged + sex_t0 + hatch_SVL + hatch_smi + (1 | clutch_id) + (1 | pen_id_t0), data = long, family = binomial) 
AICcmodavg::AICc(s7) # 1190.936
# Random intercept for pen is supported

# Model selection table
model_list <- list(s0, s1, s2, s3, s4, s5, s6, s7)
model_names <- c("s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7")
modelsel <- aictab(model_list, model_names, second.ord = T)
modelsel

# Model selection based on AICc:
#   
#    K    AICc Delta_AICc AICcWt Cum.Wt      LL
# s7 7 1190.94       0.00      1      1 -588.42
# s6 6 1207.42      16.48      0      1 -597.68
# s5 5 1249.96      59.03      0      1 -619.96
# s4 4 1255.86      64.92      0      1 -623.91
# s2 3 1258.96      68.03      0      1 -626.47
# s3 4 1260.81      69.87      0      1 -626.39
# s0 1 1266.58      75.65      0      1 -632.29
# s1 2 1267.89      76.95      0      1 -631.94

# s7 is most parsumonious model
# Check model assumptions
sim_resid_s7 <- simulateResiduals(fittedModel = s7, n = 250)
plot(sim_resid_s7) # Looks good!
testResiduals(sim_resid_s7)
testQuantiles(sim_resid_s7)

# Summary s7
summary(s7)
# Family: binomial  ( logit )
# Formula: event2 ~ tagged + sex_t0 + hatch_SVL + hatch_smi + (1 | clutch_id) + (1 | pen_id_t0)
# Data: long
# 
# AIC      BIC   logLik deviance df.resid 
# 1190.8   1226.5   -588.4   1176.8     1193 
# 
# Random effects:
#   
# Conditional model:
# Groups    Name        Variance Std.Dev.
# clutch_id (Intercept) 0.1671   0.4088  
# pen_id_t0 (Intercept) 0.5332   0.7302  
# Number of obs: 1200, groups:  clutch_id, 8; pen_id_t0, 8
# 
# Conditional model:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  1.1476423  2.3257921   0.493 0.621701    
# tagged1      0.1618077  0.1479801   1.093 0.274200    
# sex_t0m      0.5891680  0.1549546   3.802 0.000143 ***
# hatch_SVL    0.0050887  0.0269064   0.189 0.849995    
# hatch_smi   -0.0003044  0.0004968  -0.613 0.540027    
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


# Create variable that combines tagged and sex_t0 so can create a survival plot showing untagged female, untagged male, tagged female and tagged male
long$tagged_sex <- ifelse(long$tagged == "0" & long$sex_t0 == "f", "1", 
                          ifelse(long$tagged == "0" & long$sex_t0 == "m", "2", 
                                 ifelse(long$tagged == "1" & long$sex_t0 == "f", "3", "4")))

#### SURVIVAL FIGURES ####
# Create survival fit
# Fit survival data using the Kaplan-Meier method
surv_object <- Surv(time = long$age.death, event = as.numeric(long$event1)) # Note that the event needs to be a numeric variable
surv_object

#Fit the Kaplan-Meier curves to PIT and non-PIT animals
fit1 <- survfit(surv_object ~ tagged_sex, data = long)
summary(fit1)

#Fit the Kaplan-Meier curves to female and males snakes
# fit2 <- survfit(surv_object ~ sex_t0, data = long)
# summary(fit2)

# Create first plot and assign to list
surv.fig <- ggsurvplot(fit1, data = long, ylim = c(0.65, 1), legend.title = "", legend.labs = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), conf.int = TRUE, palette = c(mycolours1, mycolours2)) + 
  theme_survminer(font.submain = c(14, "bold"), font.x = c(14, "bold"), font.y = c(14, "bold"), font.tickslab = c(12)) + 
  xlab("Age (days)"); x11(); surv.fig

# splots <- list()
# splots[[1]] <- ggsurvplot(fit1, data = long, ylim = c(0.70, 1), legend.title = "", legend.labs = c("Untagged female", "Untagged male", "Tagged female", "Tagged male"), conf.int = TRUE, color = c(mycolours1, mycolours2)) + 
#   theme_survminer(font.submain = c(14, "bold"), font.x = c(14, "bold"), font.y = c(14, "bold"), font.tickslab = c(12)) + 
#   xlab("Age (days)") 

# Create second plot and assign to list
# splots[[2]] <- ggsurvplot(fit2, data = long, ylim = c(0.70, 1), legend.title = "", legend.labs = c("Female", "Male"), conf.int = TRUE, submain = "B)") + 
#   theme_survminer(font.submain = c(14, "bold"), font.x = c(14, "bold"), font.y = c(14, "bold"), font.tickslab = c(12)) + 
#   xlab("Age (days)")
# 
# # Arrange plots in multi-panel plot
# x11(); arrange_ggsurvplots(splots, print = TRUE, ncol = 2, nrow = 1)