################################################################################
### Survival analysis for Deterrent Vehicle Experiment
################################################################################

# Erase Global Environment
rm(list=ls())

### set working directory


### Import data
data<- read.csv("Data.csv")
data_RE <- read.csv("Data_RE.csv")
### Format data

data <- data[c(1:23) ]#Not sure why data came in with a bunch of extra columns
data <- data[complete.cases(data), ]
# Reorganize Data
data <- data[c(2, 3, 4, 12:17) ]
# Put into long format
library(tidyr)
data1 <- gather(data, Dist, Time, -Pen, -Number, -Treat) 
data1 <- cbind (data1, data1[,c(4)]) #copy column
names(data1)[names(data1) == "data1[, c(4)]"] <- 'Bait' #rename column
names(data1)[names(data1) == "Number"] <- 'CoyNum' #rename column
# Clean up Bait column
data1$Bait [data1$Bait == "L1D"] <- "L1"
data1$Bait [data1$Bait == "M1D"] <- "M1"
data1$Bait [data1$Bait == "R1D"] <- "R1"
data1$Bait [data1$Bait == "L2D"] <- "L2"
data1$Bait [data1$Bait == "M2D"] <- "M2"
data1$Bait [data1$Bait == "R2D"] <- "R2"

# Relabel Distance categories 
data1$Dist [data1$Dist == "L1D"] <- "close"
data1$Dist [data1$Dist == "R1D"] <- "close"
data1$Dist [data1$Dist == "M1D"] <- "close"
data1$Dist [data1$Dist == "L2D"] <- "far"
data1$Dist [data1$Dist == "R2D"] <- "far"
data1$Dist [data1$Dist == "M2D"] <- "far"

# Add a Censor column for Cox PH analysis
data1$Censor = 0
data1$Censor[data1$Time < 60] <- 1 

# Reorder Treatment variable so that "pre treatment" is the reference variable...creating a new column in dataframe
data1$TreatReorder <- factor(data1$Treat, levels=c("pre", "light", "nonreactive", "reactive"))
data1$Dist <- as.factor(data1$Dist)
data1$Censor <- as.numeric(data1$Censor)

data2 <- data1[,c(4,5,7,8)]

data3 <- data2
data3$Dist <- factor(data3$Dist, levels=c("far", "close")) # use "far" as reference level in 'data3'

# data4 in case of the use of random effects ('Pen'), which is really "coyote pair"
data4 <- cbind(data_RE$CoyID, data3)
colnames(data4) <- c("CoyID", "Dist", "Time", "Censor", "TreatReorder")
data4$CoyID <- as.factor(data4$CoyID)

################################################################################
################################################################################

#####################
### Run Cox PH models

library(survival)

cfit1 <- coxph(Surv(Time, Censor) ~ 1, data=data1) # null model
cfit2 <- coxph(Surv(Time, Censor) ~ TreatReorder, data=data1) 
cfit3 <- coxph(Surv(Time, Censor) ~ Dist, data=data1) 
cfit4 <- coxph(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) # additive model
cfit5 <- coxph(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) # interaction model

summary (cfit1)
summary (cfit2)
summary (cfit3)
summary (cfit4)
summary (cfit5) #concordance =0.77 and log rank test was highly significant indicating its better than the null

###################################################
### Formal comparisons of nested models using ANOVA

anova(cfit1, cfit2)
anova(cfit1, cfit3)
anova(cfit1, cfit4)
anova(cfit1, cfit5)
anova(cfit2, cfit3)
anova(cfit2, cfit4)
anova(cfit2, cfit5)
anova(cfit3, cfit4)
anova(cfit3, cfit5)
anova(cfit4, cfit5)  # important comparison, p = 0.04172

#Analysis of Deviance Table from anova (cfit4, cfit5)
#Cox model: response is  Surv(Time, Censor)
#Model 1: ~ TreatReorder + Dist
#Model 2: ~ TreatReorder * Dist
#loglik  Chisq Df P(>|Chi|)  
#1 -835.72                      
#2 -831.61 8.2176  3   0.04172 *
# this indicates that the interaction model fits better than the additive model
# interaction model has loglik value closest to 0

### AICc table
library(AICcmodavg)
#models <- list(cfit1, cfit2, cfit3, cfit4, cfit5, cfit5.f) # NOTE: cfit5.f model is run later in the code
#model.names <- c('Null', 'Treatment', 'Dist', 'Additive_Mod', 'Interaction_Mod', 'RE_Mod')
#aictab(cand.set = models, modnames = model.names)

# Model selection based on AICc:
#   
#                 K    AICc Delta_AICc AICcWt Cum.Wt      LL
# RE_Mod          7 1544.55       0.00      1      1 -765.03
# Interaction_Mod 7 1677.72     133.17      0      1 -831.61
# Additive_Mod    4 1679.62     135.07      0      1 -835.72
# Treatment       3 1747.17     202.62      0      1 -870.53
# Dist            1 1817.93     273.39      0      1 -907.96
# Null            0 1849.32     304.77      0      1 -924.66


################################################################################
################################################################################

### Plot Cox PH survival curves

pre <- subset(data1,TreatReorder=="pre")
light <- subset(data1,TreatReorder=="light")
nonreact <- subset(data1,TreatReorder=="nonreactive")
react <- subset(data1,TreatReorder=="reactive")

plot(survfit(cfit2,newdata=pre,conf.int=T),col="red",lty=1,xlab="Minutes",
     ylab="Survival Probability",main="Meatball Trials")
lines(survfit(cfit2,newdata=light),conf.int=T,col="blue",lty=1)
lines(survfit(cfit2,newdata=nonreact),conf.int=T,col="green",lty=1)
lines(survfit(cfit2,newdata=react),conf.int=T,col="orange",lty=1)

#subset data by treatment and distance
preclose <- subset(data1,TreatReorder=="pre" & Dist=="close")
prefar <- subset(data1,TreatReorder=="pre" & Dist=="far")
lightclose <- subset(data1,TreatReorder=="light" & Dist=="close")
lightfar <- subset(data1,TreatReorder=="pre" & Dist=="far")
nonreactclose <- subset(data1,TreatReorder=="nonreactive" & Dist=="close")
nonreactfar <- subset(data1,TreatReorder=="nonreactive" & Dist=="far")
reactclose <- subset(data1,TreatReorder=="reactive" & Dist=="close")
reactfar <- subset(data1,TreatReorder=="reactive" & Dist=="far")

par(mfrow=c(1,1))
#pretreatment close vs far
plot(survfit(cfit4,newdata=preclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes",
     ylab="Survival Probability",main="Baseline")
  lines(survfit(cfit4,newdata=prefar),conf.int=T,col="black",lty=1)
  legend("topright", legend=c("Low Risk", "High Risk"),
     col=c("firebrick2", "black"), lty=1:1, pch=1, pt.cex=2, cex=0.3)
  legend("right", legend=c("Low Risk", "High Risk"),
         col=c("firebrick2", "black"), lty=1:1,pch=21, pt.cex=2, cex=0.3)

#light close vs far
plot(survfit(cfit4,newdata=lightclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes",
     ylab="Survival Probability",main="No Movement")
  lines(survfit(cfit4,newdata=lightfar),conf.int=T,col="black",lty=1)
  legend("topright", legend=c("Low Risk", "High Risk"),
     col=c("firebrick2", "black"), lty=1:1,text.font=5, cex=0.3)

#nonreactive close vs far
plot(survfit(cfit4,newdata=nonreactclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes",
     ylab="Survival Probability",main="Nonadaptive Movement")
  lines(survfit(cfit4,newdata=nonreactfar),conf.int=T,col="black",lty=1)
  legend("topright", legend=c("Low Risk", "High Risk"),
    col=c("firebrick2", "black"), lty=1:1,text.font=3, cex=0.3)

#reactive close vs far
plot(survfit(cfit4,newdata=reactclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes",
     ylab="Survival Probability",main="Adaptive Movement")
  lines(survfit(cfit4,newdata=reactfar),conf.int=T,col="black",lty=1)
  legend("bottomright", legend=c("Low Risk", "High Risk"),
    col=c("firebrick2", "black"), lty=1:1,text.font=6, cex=0.3)

## For plot export use aspect ratio width 555 height 433

################################################################################
################################################################################

# load some additional R pkgs
library(ggplot2)
library(ggfortify)
library(survminer)
library(tidyverse)
library(tidytidbits)
library(survivalAnalysis)
library(coxme)

#######################################################
### Another way to fit models and plot survival curves?
# fxn 'survminer::survfit'
# NOT for interaction terms

cfit1.1 <- survfit(Surv(Time, Censor) ~ 1, data=data1) #null model
cfit2.1 <- survfit(Surv(Time, Censor) ~ TreatReorder, data=data1) 
cfit3.1 <- survfit(Surv(Time, Censor) ~ Dist, data=data1) 
cfit4.1 <- survfit(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) 
#cfit5.1 <- survfit(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) # does not work for interaction terms

autoplot(cfit1.1)
autoplot(cfit2.1)
autoplot(cfit3.1)
autoplot(cfit4.1) # crazy

# plot by treatment type
ggsurvplot(
  cfit2.1,                     # survfit object with calculated statistics.
  data = data1,             # data used to fit survival curves.
  risk.table = TRUE,       # show risk table.
  pval = TRUE,             # show p-value of log-rank test.
  conf.int = TRUE,         # show confidence intervals for 
  # point estimates of survival curves.
  xlim = c(0,60),         # present narrower X axis, but not affect
  # survival estimates.
  xlab = "Time in Minutes",   # customize X axis label.
  break.time.by = 10,     # break X axis in time intervals by 500.
  ggtheme = theme_light(), # customize plot and risk table with a theme.
  risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = FALSE # show bars instead of names in text annotations
  # in legend of risk table
)

# Plot by bait distance
ggsurvplot(
  cfit3.1,                     # survfit object with calculated statistics.
  data = data1,             # data used to fit survival curves.
  risk.table = TRUE,       # show risk table.
  pval = TRUE,             # show p-value of log-rank test.
  conf.int = TRUE,         # show confidence intervals for 
  # point estimates of survival curves.
  xlim = c(0,60),         # present narrower X axis, but not affect
  # survival estimates.
  xlab = "Time in Minutes",   # customize X axis label.
  break.time.by = 10,     # break X axis in time intervals by 500.
  ggtheme = theme_light(), # customize plot and risk table with a theme.
  risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = FALSE # show bars instead of names in text annotations
  # in legend of risk table
)

################################################################################
################################################################################

### Test assumptions of cox proportional hazards in the best model

######################
### Schoenfeld testing

test.ph <- cox.zph(cfit5); test.ph
#                   chisq df      p
# TreatReorder      13.37  3 0.0039 ... assumption of proportional hazards FAILS
# Dist               8.15  1 0.0043 ... assumption of proportional hazards FAILS
# TreatReorder:Dist 14.23  3 0.0026 ... assumption of proportional hazards FAILS
# GLOBAL            20.59  7 0.0044 ... assumption of proportional hazards FAILS
### Schoenfeld tests (p-values < 0.05) show failure to uphold proportional hazards assumption

ggcoxzph(test.ph)

##################
### plot residuals

ggcoxdiagnostics(cfit5, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())
# some definite outliers
ggcoxdiagnostics(cfit5, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_bw())
# curved deviance of residuals ... should be straight, showing a constant hazard

######################################################################
### Additional plotting to investigate Proportional Hazards Assumption
# ... should be parallel.
# if not parallel, can stratify by levels of a covariate?

### look at 'Dist'
KM_fit <- survfit(Surv(Time, Censor) ~ Dist, data = data2)

par(mfrow = c(1, 2))

plot(KM_fit, xlab = "Time", ylab = "Survival", col = 1:2)
legend("topright", levels(data2$Dist), lty = 1, col = 1:2, bty = "n")

plot(KM_fit, fun = function (s) -log(-log(s)), xlab = "Time", 
     ylab = "-log(- log(Survival))", col = 1:2)
legend("topright", levels(data2$Dist), lty = 1, col = 1:2, bty = "n")

### look at 'TreatReorder'
KM_fit2 <- survfit(Surv(Time, Censor) ~ TreatReorder, data = data2)

par(mfrow = c(1, 2))

plot(KM_fit2, xlab = "Time", ylab = "Survival", col = 1:4)
legend("topright", levels(data2$TreatReorder), lty = 1, col = 1:4, bty = "n")

plot(KM_fit2, fun = function (s) -log(-log(s)), xlab = "Time", 
     ylab = "-log(- log(Survival))", col = 1:4)
legend("topright", levels(data2$TreatReorder), lty = 1, col = 1:4, bty = "n")

######################################################
### test for non-proportionality ... already did this!
fit <- coxph(Surv(Time, Censor) ~ TreatReorder*Dist, data = data2)
check_PH <- cox.zph(fit, transform = "km")
check_PH
# null hypothesis is that the corresponding variable satisfies PH
# if p < 0.05, then ==> not proportional

# look at Schoenfeld residuals
# plot separately for each variable

par(mfrow = c(1, 1))

plot(check_PH, var = 1)
abline(h = coef(fit)[1], col = "red", lwd = 2)

plot(check_PH, var = 2)
abline(h = coef(fit)[2], col = "red", lwd = 2)

plot(check_PH, var = 3)
abline(h = coef(fit)[3], col = "red", lwd = 2)

# all variables fail

################################################################################
################################################################################

### Cox assumption of proportional hazard is violated, so ...

################################################################################
################################################################################

### USE ACCELERATED FAILURE TIME (AFT) MODELS

################################################################################
################################################################################

# weibull distribution is the default when using 'survreg' function
  # but can also use exponential, gaussian, lognormal, or loglogistic
# parametric regression (Cox PH = semi-parametric)

# run AFT models
cfit1.aft <- survreg(Surv(Time, Censor) ~ 1, data=data1) #null model
cfit2.aft <- survreg(Surv(Time, Censor) ~ TreatReorder, data=data1) 
cfit3.aft <- survreg(Surv(Time, Censor) ~ Dist, data=data1) 
cfit4.aft <- survreg(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) 
cfit5.aft <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) 

### Formal comparisons of nested models using ANOVA
anova(cfit1.aft, cfit2.aft)
anova(cfit1.aft, cfit3.aft)
anova(cfit1.aft, cfit4.aft)
anova(cfit1.aft, cfit5.aft)
anova(cfit2.aft, cfit3.aft)
anova(cfit2.aft, cfit4.aft)
anova(cfit2.aft, cfit5.aft)
anova(cfit3.aft, cfit4.aft)
anova(cfit3.aft, cfit5.aft)
anova(cfit4.aft, cfit5.aft)  # important comparison, p = 0.005539553

#                 Terms Resid. Df    -2*LL Test Df Deviance    Pr(>Chi)
# 1 TreatReorder + Dist       228 1152.120      NA       NA          NA
# 2 TreatReorder * Dist       225 1139.502    =  3 12.61815 0.005539553
# interaction model fits better than the additive model, as it did before

### AICc table
library(AICcmodavg)
models <- list(cfit1.aft, cfit2.aft, cfit3.aft, cfit4.aft, cfit5.aft)
model.names <- c('Null', 'Treatment', 'Dist', 'Additive_Mod', 'Interaction_Mod')
aictab(cand.set = models, modnames = model.names)

# Model selection based on AICc:
#
#                 K    AICc Delta_AICc AICcWt Cum.Wt      LL
# Interaction_Mod 9 1158.31       0.00   0.96   0.96 -569.75
# Additive_Mod    6 1164.49       6.18   0.04   1.00 -576.06
# Treatment       5 1241.86      83.55   0.00   1.00 -615.80
# Dist            3 1355.44     197.14   0.00   1.00 -674.67
# Null            2 1386.45     228.15   0.00   1.00 -691.20

summary(cfit5.aft)
# Call:
#   survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data1)
#                                   Value Std. Error     z       p
# (Intercept)                      1.8344     0.1878  9.77 < 2e-16
# TreatReorderlight                1.1010     0.2738  4.02 5.8e-05
# TreatReordernonreactive          2.7122     0.4350  6.24 4.5e-10
# TreatReorderreactive             4.2902     0.7116  6.03 1.7e-09
# Distfar                         -1.0428     0.2643 -3.95 7.9e-05
# TreatReorderlight:Distfar       -0.9359     0.3808 -2.46  0.0140
# TreatReordernonreactive:Distfar -1.6714     0.5411 -3.09  0.0020
# TreatReorderreactive:Distfar    -0.1701     0.8493 -0.20  0.8412
# Log(scale)                       0.1544     0.0549  2.81  0.0049

# Scale= 1.17 

# Weibull distribution
# Loglik(model)= -569.8   Loglik(intercept only)= -691.2
# Chisq= 242.9 on 7 degrees of freedom, p= 9e-49 
# Number of Newton-Raphson Iterations: 6 
# n= 234 

################################################################################
################################################################################

### Does Loglik improve with the use of other distributions?

cfit5.aft.wei <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "weibull") 
cfit5.aft.exp <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "exponential") 
cfit5.aft.gaus <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "gaussian") 
cfit5.aft.log <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "logistic") 
cfit5.aft.lnorm <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "lognormal") 
cfit5.aft.llog <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                         data=data1, dist = "loglogistic") 

models <- list(cfit5.aft.wei, cfit5.aft.exp, cfit5.aft.gaus, cfit5.aft.log, cfit5.aft.lnorm, cfit5.aft.llog)
model.names <- c('Weibull', 'Exponential', 'Gaussian', 'Logistic', 'Lognormal', "Loglogistic")
aictab(cand.set = models, modnames = model.names)

# Model selection based on AICc:
#   
#             K    AICc Delta_AICc AICcWt Cum.Wt      LL
# Lognormal   9 1127.25       0.00   0.81   0.81 -554.22 # Best model
# Loglogistic 9 1130.17       2.92   0.19   1.00 -555.68
# Weibull     9 1158.31      31.06   0.00   1.00 -569.75
# Exponential 8 1164.73      37.48   0.00   1.00 -574.04
# Logistic    9 1660.48     533.23   0.00   1.00 -820.84
# Gaussian    9 1696.93     569.68   0.00   1.00 -839.06

################################################################################
################################################################################

### Also look at residual plots using different AFT model distributions

# fit models using various available distributions
fit_weib <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "weibull") 
fit_exp <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "exponential") 
fit_gaus <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "gaussian") 
fit_log <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "logistic") 
fit_lnorm <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "lognormal") 
fit_llog <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "loglogistic") 

par(mfrow=c(3,2))

#######################################
### Residual plot, Weibull distribution ==> okay fit

fitted_values <- fit_weib$linear.predictors
resids <- (log(fit_weib$y[, 1]) - fitted_values) / fit_weib$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Weibull", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
yy <- exp(- exp(xx))
#yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
#yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

###########################################
### Residual plot, exponential distribution ==> okay fit

fitted_values <- fit_exp$linear.predictors
resids <- (log(fit_exp$y[, 1]) - fitted_values) / fit_exp$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Exponential", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
yy <- exp(- exp(xx))
#yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
#yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

########################################
### Residual plot, Gaussian distribution ==> bad fit

fitted_values <- fit_gaus$linear.predictors
resids <- (log(fit_gaus$y[, 1]) - fitted_values) / fit_gaus$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Gaussian", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
#yy <- exp(- exp(xx))
yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
#yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

########################################
### Residual plot, logistic distribution ==> bad fit

fitted_values <- fit_log$linear.predictors
resids <- (log(fit_log$y[, 1]) - fitted_values) / fit_log$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Logistic", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
#yy <- exp(- exp(xx))
#yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

#########################################
### Residual plot, lognormal distribution ==> good fit

fitted_values <- fit_lnorm$linear.predictors
resids <- (log(fit_lnorm$y[, 1]) - fitted_values) / fit_lnorm$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Lognormal", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
#yy <- exp(- exp(xx))
yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
#yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

###########################################
### Residual plot, loglogistic distribution ==> good fit

fitted_values <- fit_llog$linear.predictors
resids <- (log(fit_llog$y[, 1]) - fitted_values) / fit_llog$scale

resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2)

plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Loglogistic", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
#yy <- exp(- exp(xx))
#yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution
yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI KM estimate", 
                       "Survival function of Extreme Value distribution"), 
       lty = c(1,2,1), col = c(1,1,2), bty = "n")

################################################################################
################################################################################

### Plot cumulative hazard functions
# curves should steadily grow away from each other

# treatment group
KM_fit.t <- survfit(Surv(Time, Censor) ~ TreatReorder, data=data2) 
plot(KM_fit.t, fun = "cumhaz")
# bait distance
KM_fit.d <- survfit(Surv(Time, Censor) ~ Dist, data=data2) 
plot(KM_fit.d, fun = "cumhaz")

# curves end up mostly becoming parallel

################################################################################
################################################################################

### https://www.drizopoulos.com/courses/emc/survival%20analysis%20in%20r%20companion
### Tutorial to create effect plots to display results from complex models
  # (i.e., interaction terms, etc ...)
### Try, using meatball data ...

##################
### log-rank tests

# treatment group
logrank.Tx <- survdiff(Surv(Time, Censor) ~ TreatReorder, data=data2) 
logrank.Tx # significant differences among treatment groups

# bait distance
logrank.Dist <- survdiff(Surv(Time, Censor) ~ Dist, data=data2) 
logrank.Dist # significant differences b/twn bait distance groups

################################################################################
################################################################################

### fit models using log-normal (best-fit) distribution

#################################################
### using "far" bait as reference level for Dist:

fit <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                       data=data2, dist = "lognormal") 
# Check fixed effects for significance
library(car)
Anova(fit, type = 3)
# Analysis of Deviance Table (Type III tests)
#                   LR Chisq Df Pr(>Chisq)    
# TreatReorder        90.541  3  < 2.2e-16 ***
# Dist                15.326  1  9.048e-05 ***
# TreatReorder:Dist   11.327  3    0.01008 *  
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(fit)
# Call:
#   survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data2, 
#           dist = "lognormal")
#                                   Value Std. Error     z       p
# (Intercept)                      1.4695     0.1975  7.44 1.0e-13
# TreatReorderlight                0.7923     0.2808  2.82  0.0048
# TreatReordernonreactive          2.0065     0.3546  5.66 1.5e-08
# TreatReorderreactive             3.8999     0.4751  8.21 2.2e-16
# Distfar                         -1.1172     0.2794 -4.00 6.4e-05
# TreatReorderlight:Distfar       -0.8182     0.3961 -2.07  0.0389
# TreatReordernonreactive:Distfar -1.4553     0.4871 -2.99  0.0028
# TreatReorderreactive:Distfar     0.1332     0.6113  0.22  0.8275
# Log(scale)                       0.2100     0.0525  4.00 6.4e-05
# 
# Scale= 1.23 
# 
# Log Normal distribution
# Loglik(model)= -554.2   Loglik(intercept only)= -664.8
# Chisq= 221.21 on 7 degrees of freedom, p= 3.6e-44 
# Number of Newton-Raphson Iterations: 6 
# n= 234 

#################################################
### using "far" bait as reference level for Dist:

fit2.int <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
               data=data3, dist = "lognormal") 
Anova(fit2.int, type = 3)
# Analysis of Deviance Table (Type III tests)
#                   LR Chisq Df Pr(>Chisq)    
# TreatReorder       109.881  3  < 2.2e-16 ***
# Dist                15.326  1  9.048e-05 ***
# TreatReorder:Dist   11.327  3    0.01008 *  
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(fit2.int)
# Call:
#   survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data3, 
#           dist = "lognormal")
#                                     Value Std. Error     z       p
# (Intercept)                        0.3523     0.1975  1.78  0.0745
# TreatReorderlight                 -0.0258     0.2794 -0.09  0.9263
# TreatReordernonreactive            0.5512     0.3339  1.65  0.0988
# TreatReorderreactive               4.0331     0.3929 10.27 < 2e-16
# Distclose                          1.1172     0.2794  4.00 6.4e-05
# TreatReorderlight:Distclose        0.8182     0.3961  2.07  0.0389
# TreatReordernonreactive:Distclose  1.4553     0.4871  2.99  0.0028
# TreatReorderreactive:Distclose    -0.1332     0.6113 -0.22  0.8275
# Log(scale)                         0.2100     0.0525  4.00 6.4e-05
# 
# Scale= 1.23 
# 
# Log Normal distribution
# Loglik(model)= -554.2   Loglik(intercept only)= -664.8
# Chisq= 221.21 on 7 degrees of freedom, p= 3.6e-44 
# Number of Newton-Raphson Iterations: 6 
# n= 234

################################################################################
################################################################################

# FOR TABLE 2
# Run all models using lognormal distribution and compare AICs:

fit2.int <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist,
                           data=data3, dist = "lognormal") 
fit2.add <- survreg(Surv(Time, Censor) ~ TreatReorder + Dist,
                          data=data3, dist = "lognormal")
fit2.trt <- survreg(Surv(Time, Censor) ~ TreatReorder,
                          data=data3, dist = "lognormal")
fit2.dist <- survreg(Surv(Time, Censor) ~ Dist,
                          data=data3, dist = "lognormal")
fit2.null <- survreg(Surv(Time, Censor) ~ 1,
                          data=data3, dist = "lognormal") 

modelsset <- list(fit2.int, fit2.add, fit2.trt, fit2.dist, fit2.null)
ln.model.names <- c('Movement * Bait Position', 'Movement + Bait Position',
                    'Movement', 'Bait Position', 'Null')
aictab(cand.set = modelsset, modnames = ln.model.names)

# Model selection based on AICc:
  
#                          K    AICc Delta_AICc AICcWt Cum.Wt      LL
# Movement * Bait Position 9 1127.25       0.00   0.92   0.92 -554.22
# Movement + Bait Position 6 1132.14       4.89   0.08   1.00 -559.89
# Movement                 5 1209.27      82.02   0.00   1.00 -599.50
# Bait Position            3 1292.58     165.33   0.00   1.00 -643.24
# Null                     2 1333.71     206.46   0.00   1.00 -664.83


################################################################################
################################################################################

### Effect Plots

### create new data frame ('ND') with all combinations of covariates
# use 'ND' for showing effect plots
ND <- with(data3, expand.grid(
  Time = seq(0, 60, length.out = 25),
  TreatReorder = levels(TreatReorder),
  Dist = levels(Dist)
))
head(ND, 10)
dim(ND)
# predicted survival times, using fitted model and ND
prs <- predict(fit2.int, ND, se.fit = TRUE, type = "lp")
# add upper/lower CIs to data
ND$pred <- prs[[1]]
ND$se <- prs[[2]]
ND$lo <- ND$pred - 1.96 * ND$se
ND$up <- ND$pred + 1.96 * ND$se
head(ND, 51)
tail(ND, 51)

######################
### Graph effect plots

library(lattice)
xyplot(pred + lo + up ~ Time | TreatReorder*Dist, data = ND, type = "l", 
       lty = c(1,2,2), col = "black", lwd = 2, xlab = "Time", 
       ylab = "Log Survival Time")



################################################################################

par(mfrow=c(2,2))
par(mfrow=c(1,1))

ggplot(ND, aes(x = TreatReorder, y = Time)) +
  geom_point(aes(color = TreatReorder)) +
  facet_wrap(~TreatReorder)+
  theme_bw() +
  ggtitle("Model fit with 95% CIs and PIs",
          "solid line = mean, dotted line = median") +
  geom_line(aes(y = pred), linetype = 1) +
  geom_line(aes(y = pred), linetype = 2) +
  geom_ribbon(aes(ymin = lo, ymax = up), alpha = 0.5) +
  geom_ribbon(aes(ymin = lo, ymax = up), alpha = 0.1)

#############################################
# look at one level of categorical covariate?

#ND <- with(data3, expand.grid(
#  Time = seq(0, 60, length.out = 50),
#  TreatReorder = factor("nonreactive", levels = levels(TreatReorder)),
#  Dist = levels(Dist)
#))

###############################################################
### Plot predicted survival in original time scale... use exp()

# predict, using fitted model and ND
prs2 <- predict(fit2.int, ND, se.fit = TRUE, type = "lp")
# add upper/lower CIs to data
ND$pred <- prs2[[1]]
ND$se <- prs2[[2]]
ND$lo <- exp(ND$pred - 1.96 * ND$se)
ND$up <- exp(ND$pred + 1.96 * ND$se)
ND$pred <- exp(ND$pred)
head(ND, 76)
tail(ND, 76)
dim(ND)

xyplot(pred + lo + up ~ Time | TreatReorder*Dist, data = ND, type = "l", 
       lty = c(1,2,2), col = "black", lwd = 2, xlab = "Time", 
       ylab = "Survival Time")

xyplot(pred + lo + up ~ TreatReorder | Dist, data = ND, type = "l", 
       lty = c(1,2,2), col = "black", lwd = 2, xlab = "Treatment", 
       ylab = "Survival Time")

xyplot(pred + lo + up ~ Dist | TreatReorder, data = ND, type = "l", 
       lty = c(1,2,2), col = "black", lwd = 2, xlab = "Bait Distance", 
       ylab = "Survival Time")

### MAKE THESE EFFECT PLOTS INTO TABLES OR OTHER GRAPHS?!?!?!?!?!?!?

################################################################################
################################################################################




################################################################################
################################################################################

### Some descriptive statistics ...

###############################
### Boxplots from original data ... basic visual summary of data.

# Boxplot of survival data, by treatment group
with(data3, plot(TreatReorder, Time, xlab='Treatment Type', ylab='Minutes Until Taken', 
                     xlim=c(0,5), ylim=c(0.1, 100), log='y', type='n'))

# Boxplot of survival data, by bait distance
with(data3, plot(Dist, Time, xlab='Bait Distance', ylab='Minutes Until Taken', 
                 xlim=c(0,3), ylim=c(0.1, 100), log='y', type='n'))

#######################################
### Marginal means, from collected data

# subset groups ... 
pre.1 <- subset(data3,TreatReorder=="pre")
light.1 <- subset(data3,TreatReorder=="light")
nonreactive.1 <- subset(data3,TreatReorder=="nonreactive")
reactive.1 <- subset(data3,TreatReorder=="reactive")
close.1 <- subset(data3, Dist == "close")
far.1 <- subset(data3, Dist == "far")
# subset more specific groups
pre.close.1 <- subset(data3, Dist == "close" & TreatReorder == "pre")
pre.far.1 <- subset(data3, Dist == "far" & TreatReorder == "pre")
light.close.1 <- subset(data3, Dist == "close" & TreatReorder == "light")
light.far.1 <- subset(data3, Dist == "far" & TreatReorder == "light")
nonreactive.close.1 <- subset(data3, Dist == "close" & TreatReorder == "nonreactive")
nonreactive.far.1 <- subset(data3, Dist == "far" & TreatReorder == "nonreactive")
reactive.close.1 <- subset(data3, Dist == "close" & TreatReorder == "reactive")
reactive.far.1 <- subset(data3, Dist == "far" & TreatReorder == "reactive")

# means for each subset
mean <- as.data.frame(rbind(mean(pre.1$Time),mean(light.1$Time),mean(nonreactive.1$Time),
                            mean(reactive.1$Time),mean(close.1$Time),mean(far.1$Time),
                            mean(pre.close.1$Time),mean(pre.far.1$Time),mean(light.close.1$Time),
                            mean(light.far.1$Time),mean(nonreactive.close.1$Time),mean(nonreactive.far.1$Time),
                            mean(reactive.close.1$Time),mean(reactive.far.1$Time)))

# SDs for each subset
SD <- as.data.frame(rbind(sd(pre.1$Time),sd(light.1$Time),sd(nonreactive.1$Time),sd(reactive.1$Time),
                          sd(close.1$Time),sd(far.1$Time),sd(pre.close.1$Time),sd(pre.far.1$Time),
                          sd(light.close.1$Time),sd(light.far.1$Time),sd(nonreactive.close.1$Time),
                          sd(nonreactive.far.1$Time),sd(reactive.close.1$Time),sd(reactive.far.1$Time)))

# Standard error function
se <-function(x) sd(x)/sqrt(length(x))
# SEs for each subset
stderror <- as.data.frame(rbind(se(pre.1$Time),se(light.1$Time),se(nonreactive.1$Time),se(reactive.1$Time),
                                se(close.1$Time),se(far.1$Time),se(pre.close.1$Time),se(pre.far.1$Time),
                                se(light.close.1$Time),se(light.far.1$Time),se(nonreactive.close.1$Time),
                                se(nonreactive.far.1$Time),se(reactive.close.1$Time),se(reactive.far.1$Time)))

######################
# Create summary table
summary.table <- as.data.frame(cbind(mean, SD, stderror))
colnames(summary.table) <- c("Mean", "SD", "SE")
rownames(summary.table) <- c("Pre", "Light", "Nonreactive", "Reactive", "Close", "Far",
                             "Pre_Close", "Pre_Far", "Light_Close", "Light_Far",
                             "Nonreactive_Close", "Nonreactive_Far", "Reactive_Close", "Reactive_Far")
summary.table$Upper_Bar <- apply(summary.table[,c(1,3)], 1, sum)
Lower_Bar <- as.data.frame(cbind(summary.table$Mean - summary.table$SE))
summary.table <- as.data.frame(cbind(summary.table, Lower_Bar))
colnames(summary.table) <- c("Mean", "SD", "SE", "Upper_Bar", "Lower_Bar")

summary.table

# export
library(xlsx)
#write.xlsx(summary.table, file = "summary_table.xlsx")

###########################################
# Which baits SURVIVED the 60-minute trial?
survived <- subset(data3, Censor == 0)
survived
table(survived$Dist, survived$TreatReorder)

#       pre light nonreactive reactive
# far     0     0           0       11
# close   0     5          12       15

#########################################
# Which baits FAILED the 60-minute trial?
failed <- subset(data3, Censor == 1)
failed
table(failed$Dist, failed$TreatReorder)

#       pre light nonreactive reactive
# far    39    39          21        7
# close  39    34           9        3

################################################################################
################################################################################


###########################################
### Pairwise comparisons of predicted means

### Bait distance
emm.dist <- emmeans(fit2, "Dist")
pairs(emm.dist)
# NOTE: Results may be misleading due to involvement in interactions
# 
# contrast    estimate    SE  df t.ratio p.value
# far - close    -1.65 0.196 225  -8.429  <.0001
# 
# Results are averaged over the levels of: TreatReorder 
# Results are given on the log (not the response) scale. 

pwpm(emm.dist)

### Treatment group
emm.treat <- emmeans(fit2, "TreatReorder")
pairs(emm.treat)
# NOTE: Results may be misleading due to involvement in interactions
# 
# contrast               estimate    SE  df t.ratio p.value
# pre - light              -0.383 0.198 225  -1.935  0.2164
# pre - nonreactive        -1.279 0.244 225  -5.251  <.0001
# pre - reactive           -3.966 0.311 225 -12.763  <.0001
# light - nonreactive      -0.896 0.244 225  -3.674  0.0017
# light - reactive         -3.583 0.311 225 -11.538  <.0001
# nonreactive - reactive   -2.688 0.339 225  -7.929  <.0001
# 
# Results are averaged over the levels of: Dist 
# Results are given on the log (not the response) scale. 
# P value adjustment: tukey method for comparing a family of 4 estimates 

pwpm(emm.treat)

# plot
plot(emm.dist, comparisons = TRUE)
plot(emm.treat, comparisons = TRUE)

################################################################################
################################################################################


### END ###