################################################################################ ### Survival analysis for Deterrent Vehicle Experiment ################################################################################ # Erase Global Environment rm(list=ls()) ### set working directory ### Import data data<- read.csv("Data.csv") data_RE <- read.csv("Data_RE.csv") ### Format data data <- data[c(1:23) ]#Not sure why data came in with a bunch of extra columns data <- data[complete.cases(data), ] # Reorganize Data data <- data[c(2, 3, 4, 12:17) ] # Put into long format library(tidyr) data1 <- gather(data, Dist, Time, -Pen, -Number, -Treat) data1 <- cbind (data1, data1[,c(4)]) #copy column names(data1)[names(data1) == "data1[, c(4)]"] <- 'Bait' #rename column names(data1)[names(data1) == "Number"] <- 'CoyNum' #rename column # Clean up Bait column data1$Bait [data1$Bait == "L1D"] <- "L1" data1$Bait [data1$Bait == "M1D"] <- "M1" data1$Bait [data1$Bait == "R1D"] <- "R1" data1$Bait [data1$Bait == "L2D"] <- "L2" data1$Bait [data1$Bait == "M2D"] <- "M2" data1$Bait [data1$Bait == "R2D"] <- "R2" # Relabel Distance categories data1$Dist [data1$Dist == "L1D"] <- "close" data1$Dist [data1$Dist == "R1D"] <- "close" data1$Dist [data1$Dist == "M1D"] <- "close" data1$Dist [data1$Dist == "L2D"] <- "far" data1$Dist [data1$Dist == "R2D"] <- "far" data1$Dist [data1$Dist == "M2D"] <- "far" # Add a Censor column for Cox PH analysis data1$Censor = 0 data1$Censor[data1$Time < 60] <- 1 # Reorder Treatment variable so that "pre treatment" is the reference variable...creating a new column in dataframe data1$TreatReorder <- factor(data1$Treat, levels=c("pre", "light", "nonreactive", "reactive")) data1$Dist <- as.factor(data1$Dist) data1$Censor <- as.numeric(data1$Censor) data2 <- data1[,c(4,5,7,8)] data3 <- data2 data3$Dist <- factor(data3$Dist, levels=c("far", "close")) # use "far" as reference level in 'data3' # data4 in case of the use of random effects ('Pen'), which is really "coyote pair" data4 <- cbind(data_RE$CoyID, data3) colnames(data4) <- c("CoyID", "Dist", "Time", "Censor", "TreatReorder") data4$CoyID <- as.factor(data4$CoyID) ################################################################################ ################################################################################ ##################### ### Run Cox PH models library(survival) cfit1 <- coxph(Surv(Time, Censor) ~ 1, data=data1) # null model cfit2 <- coxph(Surv(Time, Censor) ~ TreatReorder, data=data1) cfit3 <- coxph(Surv(Time, Censor) ~ Dist, data=data1) cfit4 <- coxph(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) # additive model cfit5 <- coxph(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) # interaction model summary (cfit1) summary (cfit2) summary (cfit3) summary (cfit4) summary (cfit5) #concordance =0.77 and log rank test was highly significant indicating its better than the null ################################################### ### Formal comparisons of nested models using ANOVA anova(cfit1, cfit2) anova(cfit1, cfit3) anova(cfit1, cfit4) anova(cfit1, cfit5) anova(cfit2, cfit3) anova(cfit2, cfit4) anova(cfit2, cfit5) anova(cfit3, cfit4) anova(cfit3, cfit5) anova(cfit4, cfit5) # important comparison, p = 0.04172 #Analysis of Deviance Table from anova (cfit4, cfit5) #Cox model: response is Surv(Time, Censor) #Model 1: ~ TreatReorder + Dist #Model 2: ~ TreatReorder * Dist #loglik Chisq Df P(>|Chi|) #1 -835.72 #2 -831.61 8.2176 3 0.04172 * # this indicates that the interaction model fits better than the additive model # interaction model has loglik value closest to 0 ### AICc table library(AICcmodavg) #models <- list(cfit1, cfit2, cfit3, cfit4, cfit5, cfit5.f) # NOTE: cfit5.f model is run later in the code #model.names <- c('Null', 'Treatment', 'Dist', 'Additive_Mod', 'Interaction_Mod', 'RE_Mod') #aictab(cand.set = models, modnames = model.names) # Model selection based on AICc: # # K AICc Delta_AICc AICcWt Cum.Wt LL # RE_Mod 7 1544.55 0.00 1 1 -765.03 # Interaction_Mod 7 1677.72 133.17 0 1 -831.61 # Additive_Mod 4 1679.62 135.07 0 1 -835.72 # Treatment 3 1747.17 202.62 0 1 -870.53 # Dist 1 1817.93 273.39 0 1 -907.96 # Null 0 1849.32 304.77 0 1 -924.66 ################################################################################ ################################################################################ ### Plot Cox PH survival curves pre <- subset(data1,TreatReorder=="pre") light <- subset(data1,TreatReorder=="light") nonreact <- subset(data1,TreatReorder=="nonreactive") react <- subset(data1,TreatReorder=="reactive") plot(survfit(cfit2,newdata=pre,conf.int=T),col="red",lty=1,xlab="Minutes", ylab="Survival Probability",main="Meatball Trials") lines(survfit(cfit2,newdata=light),conf.int=T,col="blue",lty=1) lines(survfit(cfit2,newdata=nonreact),conf.int=T,col="green",lty=1) lines(survfit(cfit2,newdata=react),conf.int=T,col="orange",lty=1) #subset data by treatment and distance preclose <- subset(data1,TreatReorder=="pre" & Dist=="close") prefar <- subset(data1,TreatReorder=="pre" & Dist=="far") lightclose <- subset(data1,TreatReorder=="light" & Dist=="close") lightfar <- subset(data1,TreatReorder=="pre" & Dist=="far") nonreactclose <- subset(data1,TreatReorder=="nonreactive" & Dist=="close") nonreactfar <- subset(data1,TreatReorder=="nonreactive" & Dist=="far") reactclose <- subset(data1,TreatReorder=="reactive" & Dist=="close") reactfar <- subset(data1,TreatReorder=="reactive" & Dist=="far") par(mfrow=c(1,1)) #pretreatment close vs far plot(survfit(cfit4,newdata=preclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes", ylab="Survival Probability",main="Baseline") lines(survfit(cfit4,newdata=prefar),conf.int=T,col="black",lty=1) legend("topright", legend=c("Low Risk", "High Risk"), col=c("firebrick2", "black"), lty=1:1, pch=1, pt.cex=2, cex=0.3) legend("right", legend=c("Low Risk", "High Risk"), col=c("firebrick2", "black"), lty=1:1,pch=21, pt.cex=2, cex=0.3) #light close vs far plot(survfit(cfit4,newdata=lightclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes", ylab="Survival Probability",main="No Movement") lines(survfit(cfit4,newdata=lightfar),conf.int=T,col="black",lty=1) legend("topright", legend=c("Low Risk", "High Risk"), col=c("firebrick2", "black"), lty=1:1,text.font=5, cex=0.3) #nonreactive close vs far plot(survfit(cfit4,newdata=nonreactclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes", ylab="Survival Probability",main="Nonadaptive Movement") lines(survfit(cfit4,newdata=nonreactfar),conf.int=T,col="black",lty=1) legend("topright", legend=c("Low Risk", "High Risk"), col=c("firebrick2", "black"), lty=1:1,text.font=3, cex=0.3) #reactive close vs far plot(survfit(cfit4,newdata=reactclose),conf.int=T,col="firebrick2",lty=1,xlab="Minutes", ylab="Survival Probability",main="Adaptive Movement") lines(survfit(cfit4,newdata=reactfar),conf.int=T,col="black",lty=1) legend("bottomright", legend=c("Low Risk", "High Risk"), col=c("firebrick2", "black"), lty=1:1,text.font=6, cex=0.3) ## For plot export use aspect ratio width 555 height 433 ################################################################################ ################################################################################ # load some additional R pkgs library(ggplot2) library(ggfortify) library(survminer) library(tidyverse) library(tidytidbits) library(survivalAnalysis) library(coxme) ####################################################### ### Another way to fit models and plot survival curves? # fxn 'survminer::survfit' # NOT for interaction terms cfit1.1 <- survfit(Surv(Time, Censor) ~ 1, data=data1) #null model cfit2.1 <- survfit(Surv(Time, Censor) ~ TreatReorder, data=data1) cfit3.1 <- survfit(Surv(Time, Censor) ~ Dist, data=data1) cfit4.1 <- survfit(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) #cfit5.1 <- survfit(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) # does not work for interaction terms autoplot(cfit1.1) autoplot(cfit2.1) autoplot(cfit3.1) autoplot(cfit4.1) # crazy # plot by treatment type ggsurvplot( cfit2.1, # survfit object with calculated statistics. data = data1, # data used to fit survival curves. risk.table = TRUE, # show risk table. pval = TRUE, # show p-value of log-rank test. conf.int = TRUE, # show confidence intervals for # point estimates of survival curves. xlim = c(0,60), # present narrower X axis, but not affect # survival estimates. xlab = "Time in Minutes", # customize X axis label. break.time.by = 10, # break X axis in time intervals by 500. ggtheme = theme_light(), # customize plot and risk table with a theme. risk.table.y.text.col = T, # colour risk table text annotations. risk.table.y.text = FALSE # show bars instead of names in text annotations # in legend of risk table ) # Plot by bait distance ggsurvplot( cfit3.1, # survfit object with calculated statistics. data = data1, # data used to fit survival curves. risk.table = TRUE, # show risk table. pval = TRUE, # show p-value of log-rank test. conf.int = TRUE, # show confidence intervals for # point estimates of survival curves. xlim = c(0,60), # present narrower X axis, but not affect # survival estimates. xlab = "Time in Minutes", # customize X axis label. break.time.by = 10, # break X axis in time intervals by 500. ggtheme = theme_light(), # customize plot and risk table with a theme. risk.table.y.text.col = T, # colour risk table text annotations. risk.table.y.text = FALSE # show bars instead of names in text annotations # in legend of risk table ) ################################################################################ ################################################################################ ### Test assumptions of cox proportional hazards in the best model ###################### ### Schoenfeld testing test.ph <- cox.zph(cfit5); test.ph # chisq df p # TreatReorder 13.37 3 0.0039 ... assumption of proportional hazards FAILS # Dist 8.15 1 0.0043 ... assumption of proportional hazards FAILS # TreatReorder:Dist 14.23 3 0.0026 ... assumption of proportional hazards FAILS # GLOBAL 20.59 7 0.0044 ... assumption of proportional hazards FAILS ### Schoenfeld tests (p-values < 0.05) show failure to uphold proportional hazards assumption ggcoxzph(test.ph) ################## ### plot residuals ggcoxdiagnostics(cfit5, type = "dfbeta", linear.predictions = FALSE, ggtheme = theme_bw()) # some definite outliers ggcoxdiagnostics(cfit5, type = "deviance", linear.predictions = FALSE, ggtheme = theme_bw()) # curved deviance of residuals ... should be straight, showing a constant hazard ###################################################################### ### Additional plotting to investigate Proportional Hazards Assumption # ... should be parallel. # if not parallel, can stratify by levels of a covariate? ### look at 'Dist' KM_fit <- survfit(Surv(Time, Censor) ~ Dist, data = data2) par(mfrow = c(1, 2)) plot(KM_fit, xlab = "Time", ylab = "Survival", col = 1:2) legend("topright", levels(data2$Dist), lty = 1, col = 1:2, bty = "n") plot(KM_fit, fun = function (s) -log(-log(s)), xlab = "Time", ylab = "-log(- log(Survival))", col = 1:2) legend("topright", levels(data2$Dist), lty = 1, col = 1:2, bty = "n") ### look at 'TreatReorder' KM_fit2 <- survfit(Surv(Time, Censor) ~ TreatReorder, data = data2) par(mfrow = c(1, 2)) plot(KM_fit2, xlab = "Time", ylab = "Survival", col = 1:4) legend("topright", levels(data2$TreatReorder), lty = 1, col = 1:4, bty = "n") plot(KM_fit2, fun = function (s) -log(-log(s)), xlab = "Time", ylab = "-log(- log(Survival))", col = 1:4) legend("topright", levels(data2$TreatReorder), lty = 1, col = 1:4, bty = "n") ###################################################### ### test for non-proportionality ... already did this! fit <- coxph(Surv(Time, Censor) ~ TreatReorder*Dist, data = data2) check_PH <- cox.zph(fit, transform = "km") check_PH # null hypothesis is that the corresponding variable satisfies PH # if p < 0.05, then ==> not proportional # look at Schoenfeld residuals # plot separately for each variable par(mfrow = c(1, 1)) plot(check_PH, var = 1) abline(h = coef(fit)[1], col = "red", lwd = 2) plot(check_PH, var = 2) abline(h = coef(fit)[2], col = "red", lwd = 2) plot(check_PH, var = 3) abline(h = coef(fit)[3], col = "red", lwd = 2) # all variables fail ################################################################################ ################################################################################ ### Cox assumption of proportional hazard is violated, so ... ################################################################################ ################################################################################ ### USE ACCELERATED FAILURE TIME (AFT) MODELS ################################################################################ ################################################################################ # weibull distribution is the default when using 'survreg' function # but can also use exponential, gaussian, lognormal, or loglogistic # parametric regression (Cox PH = semi-parametric) # run AFT models cfit1.aft <- survreg(Surv(Time, Censor) ~ 1, data=data1) #null model cfit2.aft <- survreg(Surv(Time, Censor) ~ TreatReorder, data=data1) cfit3.aft <- survreg(Surv(Time, Censor) ~ Dist, data=data1) cfit4.aft <- survreg(Surv(Time, Censor) ~ TreatReorder + Dist, data=data1) cfit5.aft <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1) ### Formal comparisons of nested models using ANOVA anova(cfit1.aft, cfit2.aft) anova(cfit1.aft, cfit3.aft) anova(cfit1.aft, cfit4.aft) anova(cfit1.aft, cfit5.aft) anova(cfit2.aft, cfit3.aft) anova(cfit2.aft, cfit4.aft) anova(cfit2.aft, cfit5.aft) anova(cfit3.aft, cfit4.aft) anova(cfit3.aft, cfit5.aft) anova(cfit4.aft, cfit5.aft) # important comparison, p = 0.005539553 # Terms Resid. Df -2*LL Test Df Deviance Pr(>Chi) # 1 TreatReorder + Dist 228 1152.120 NA NA NA # 2 TreatReorder * Dist 225 1139.502 = 3 12.61815 0.005539553 # interaction model fits better than the additive model, as it did before ### AICc table library(AICcmodavg) models <- list(cfit1.aft, cfit2.aft, cfit3.aft, cfit4.aft, cfit5.aft) model.names <- c('Null', 'Treatment', 'Dist', 'Additive_Mod', 'Interaction_Mod') aictab(cand.set = models, modnames = model.names) # Model selection based on AICc: # # K AICc Delta_AICc AICcWt Cum.Wt LL # Interaction_Mod 9 1158.31 0.00 0.96 0.96 -569.75 # Additive_Mod 6 1164.49 6.18 0.04 1.00 -576.06 # Treatment 5 1241.86 83.55 0.00 1.00 -615.80 # Dist 3 1355.44 197.14 0.00 1.00 -674.67 # Null 2 1386.45 228.15 0.00 1.00 -691.20 summary(cfit5.aft) # Call: # survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data1) # Value Std. Error z p # (Intercept) 1.8344 0.1878 9.77 < 2e-16 # TreatReorderlight 1.1010 0.2738 4.02 5.8e-05 # TreatReordernonreactive 2.7122 0.4350 6.24 4.5e-10 # TreatReorderreactive 4.2902 0.7116 6.03 1.7e-09 # Distfar -1.0428 0.2643 -3.95 7.9e-05 # TreatReorderlight:Distfar -0.9359 0.3808 -2.46 0.0140 # TreatReordernonreactive:Distfar -1.6714 0.5411 -3.09 0.0020 # TreatReorderreactive:Distfar -0.1701 0.8493 -0.20 0.8412 # Log(scale) 0.1544 0.0549 2.81 0.0049 # Scale= 1.17 # Weibull distribution # Loglik(model)= -569.8 Loglik(intercept only)= -691.2 # Chisq= 242.9 on 7 degrees of freedom, p= 9e-49 # Number of Newton-Raphson Iterations: 6 # n= 234 ################################################################################ ################################################################################ ### Does Loglik improve with the use of other distributions? cfit5.aft.wei <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "weibull") cfit5.aft.exp <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "exponential") cfit5.aft.gaus <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "gaussian") cfit5.aft.log <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "logistic") cfit5.aft.lnorm <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "lognormal") cfit5.aft.llog <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data1, dist = "loglogistic") models <- list(cfit5.aft.wei, cfit5.aft.exp, cfit5.aft.gaus, cfit5.aft.log, cfit5.aft.lnorm, cfit5.aft.llog) model.names <- c('Weibull', 'Exponential', 'Gaussian', 'Logistic', 'Lognormal', "Loglogistic") aictab(cand.set = models, modnames = model.names) # Model selection based on AICc: # # K AICc Delta_AICc AICcWt Cum.Wt LL # Lognormal 9 1127.25 0.00 0.81 0.81 -554.22 # Best model # Loglogistic 9 1130.17 2.92 0.19 1.00 -555.68 # Weibull 9 1158.31 31.06 0.00 1.00 -569.75 # Exponential 8 1164.73 37.48 0.00 1.00 -574.04 # Logistic 9 1660.48 533.23 0.00 1.00 -820.84 # Gaussian 9 1696.93 569.68 0.00 1.00 -839.06 ################################################################################ ################################################################################ ### Also look at residual plots using different AFT model distributions # fit models using various available distributions fit_weib <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "weibull") fit_exp <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "exponential") fit_gaus <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "gaussian") fit_log <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "logistic") fit_lnorm <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "lognormal") fit_llog <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "loglogistic") par(mfrow=c(3,2)) ####################################### ### Residual plot, Weibull distribution ==> okay fit fitted_values <- fit_weib$linear.predictors resids <- (log(fit_weib$y[, 1]) - fitted_values) / fit_weib$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Weibull", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) yy <- exp(- exp(xx)) #yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution #yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ########################################### ### Residual plot, exponential distribution ==> okay fit fitted_values <- fit_exp$linear.predictors resids <- (log(fit_exp$y[, 1]) - fitted_values) / fit_exp$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Exponential", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) yy <- exp(- exp(xx)) #yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution #yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ######################################## ### Residual plot, Gaussian distribution ==> bad fit fitted_values <- fit_gaus$linear.predictors resids <- (log(fit_gaus$y[, 1]) - fitted_values) / fit_gaus$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Gaussian", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) #yy <- exp(- exp(xx)) yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution #yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ######################################## ### Residual plot, logistic distribution ==> bad fit fitted_values <- fit_log$linear.predictors resids <- (log(fit_log$y[, 1]) - fitted_values) / fit_log$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Logistic", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) #yy <- exp(- exp(xx)) #yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ######################################### ### Residual plot, lognormal distribution ==> good fit fitted_values <- fit_lnorm$linear.predictors resids <- (log(fit_lnorm$y[, 1]) - fitted_values) / fit_lnorm$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Lognormal", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) #yy <- exp(- exp(xx)) yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution #yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ########################################### ### Residual plot, loglogistic distribution ==> good fit fitted_values <- fit_llog$linear.predictors resids <- (log(fit_llog$y[, 1]) - fitted_values) / fit_llog$scale resKM <- survfit(Surv(resids, Censor) ~ 1, data = data2) plot(resKM, mark.time = FALSE, xlab = "AFT Residuals, Loglogistic", ylab = "Survival Probability") xx <- seq(min(resids), max(resids), length.out = 35) #yy <- exp(- exp(xx)) #yy <- pnorm(xx, lower.tail = FALSE) # for lognormal or Gaussian distribution yy <- plogis(xx, lower.tail = FALSE) # for log-logistic or logistic distribution lines(xx, yy, col = "red", lwd = 2) legend("bottomleft", c("KM estimate", "95% CI KM estimate", "Survival function of Extreme Value distribution"), lty = c(1,2,1), col = c(1,1,2), bty = "n") ################################################################################ ################################################################################ ### Plot cumulative hazard functions # curves should steadily grow away from each other # treatment group KM_fit.t <- survfit(Surv(Time, Censor) ~ TreatReorder, data=data2) plot(KM_fit.t, fun = "cumhaz") # bait distance KM_fit.d <- survfit(Surv(Time, Censor) ~ Dist, data=data2) plot(KM_fit.d, fun = "cumhaz") # curves end up mostly becoming parallel ################################################################################ ################################################################################ ### https://www.drizopoulos.com/courses/emc/survival%20analysis%20in%20r%20companion ### Tutorial to create effect plots to display results from complex models # (i.e., interaction terms, etc ...) ### Try, using meatball data ... ################## ### log-rank tests # treatment group logrank.Tx <- survdiff(Surv(Time, Censor) ~ TreatReorder, data=data2) logrank.Tx # significant differences among treatment groups # bait distance logrank.Dist <- survdiff(Surv(Time, Censor) ~ Dist, data=data2) logrank.Dist # significant differences b/twn bait distance groups ################################################################################ ################################################################################ ### fit models using log-normal (best-fit) distribution ################################################# ### using "far" bait as reference level for Dist: fit <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data2, dist = "lognormal") # Check fixed effects for significance library(car) Anova(fit, type = 3) # Analysis of Deviance Table (Type III tests) # LR Chisq Df Pr(>Chisq) # TreatReorder 90.541 3 < 2.2e-16 *** # Dist 15.326 1 9.048e-05 *** # TreatReorder:Dist 11.327 3 0.01008 * # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 summary(fit) # Call: # survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data2, # dist = "lognormal") # Value Std. Error z p # (Intercept) 1.4695 0.1975 7.44 1.0e-13 # TreatReorderlight 0.7923 0.2808 2.82 0.0048 # TreatReordernonreactive 2.0065 0.3546 5.66 1.5e-08 # TreatReorderreactive 3.8999 0.4751 8.21 2.2e-16 # Distfar -1.1172 0.2794 -4.00 6.4e-05 # TreatReorderlight:Distfar -0.8182 0.3961 -2.07 0.0389 # TreatReordernonreactive:Distfar -1.4553 0.4871 -2.99 0.0028 # TreatReorderreactive:Distfar 0.1332 0.6113 0.22 0.8275 # Log(scale) 0.2100 0.0525 4.00 6.4e-05 # # Scale= 1.23 # # Log Normal distribution # Loglik(model)= -554.2 Loglik(intercept only)= -664.8 # Chisq= 221.21 on 7 degrees of freedom, p= 3.6e-44 # Number of Newton-Raphson Iterations: 6 # n= 234 ################################################# ### using "far" bait as reference level for Dist: fit2.int <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data3, dist = "lognormal") Anova(fit2.int, type = 3) # Analysis of Deviance Table (Type III tests) # LR Chisq Df Pr(>Chisq) # TreatReorder 109.881 3 < 2.2e-16 *** # Dist 15.326 1 9.048e-05 *** # TreatReorder:Dist 11.327 3 0.01008 * # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 summary(fit2.int) # Call: # survreg(formula = Surv(Time, Censor) ~ TreatReorder * Dist, data = data3, # dist = "lognormal") # Value Std. Error z p # (Intercept) 0.3523 0.1975 1.78 0.0745 # TreatReorderlight -0.0258 0.2794 -0.09 0.9263 # TreatReordernonreactive 0.5512 0.3339 1.65 0.0988 # TreatReorderreactive 4.0331 0.3929 10.27 < 2e-16 # Distclose 1.1172 0.2794 4.00 6.4e-05 # TreatReorderlight:Distclose 0.8182 0.3961 2.07 0.0389 # TreatReordernonreactive:Distclose 1.4553 0.4871 2.99 0.0028 # TreatReorderreactive:Distclose -0.1332 0.6113 -0.22 0.8275 # Log(scale) 0.2100 0.0525 4.00 6.4e-05 # # Scale= 1.23 # # Log Normal distribution # Loglik(model)= -554.2 Loglik(intercept only)= -664.8 # Chisq= 221.21 on 7 degrees of freedom, p= 3.6e-44 # Number of Newton-Raphson Iterations: 6 # n= 234 ################################################################################ ################################################################################ # FOR TABLE 2 # Run all models using lognormal distribution and compare AICs: fit2.int <- survreg(Surv(Time, Censor) ~ TreatReorder * Dist, data=data3, dist = "lognormal") fit2.add <- survreg(Surv(Time, Censor) ~ TreatReorder + Dist, data=data3, dist = "lognormal") fit2.trt <- survreg(Surv(Time, Censor) ~ TreatReorder, data=data3, dist = "lognormal") fit2.dist <- survreg(Surv(Time, Censor) ~ Dist, data=data3, dist = "lognormal") fit2.null <- survreg(Surv(Time, Censor) ~ 1, data=data3, dist = "lognormal") modelsset <- list(fit2.int, fit2.add, fit2.trt, fit2.dist, fit2.null) ln.model.names <- c('Movement * Bait Position', 'Movement + Bait Position', 'Movement', 'Bait Position', 'Null') aictab(cand.set = modelsset, modnames = ln.model.names) # Model selection based on AICc: # K AICc Delta_AICc AICcWt Cum.Wt LL # Movement * Bait Position 9 1127.25 0.00 0.92 0.92 -554.22 # Movement + Bait Position 6 1132.14 4.89 0.08 1.00 -559.89 # Movement 5 1209.27 82.02 0.00 1.00 -599.50 # Bait Position 3 1292.58 165.33 0.00 1.00 -643.24 # Null 2 1333.71 206.46 0.00 1.00 -664.83 ################################################################################ ################################################################################ ### Effect Plots ### create new data frame ('ND') with all combinations of covariates # use 'ND' for showing effect plots ND <- with(data3, expand.grid( Time = seq(0, 60, length.out = 25), TreatReorder = levels(TreatReorder), Dist = levels(Dist) )) head(ND, 10) dim(ND) # predicted survival times, using fitted model and ND prs <- predict(fit2.int, ND, se.fit = TRUE, type = "lp") # add upper/lower CIs to data ND$pred <- prs[[1]] ND$se <- prs[[2]] ND$lo <- ND$pred - 1.96 * ND$se ND$up <- ND$pred + 1.96 * ND$se head(ND, 51) tail(ND, 51) ###################### ### Graph effect plots library(lattice) xyplot(pred + lo + up ~ Time | TreatReorder*Dist, data = ND, type = "l", lty = c(1,2,2), col = "black", lwd = 2, xlab = "Time", ylab = "Log Survival Time") ################################################################################ par(mfrow=c(2,2)) par(mfrow=c(1,1)) ggplot(ND, aes(x = TreatReorder, y = Time)) + geom_point(aes(color = TreatReorder)) + facet_wrap(~TreatReorder)+ theme_bw() + ggtitle("Model fit with 95% CIs and PIs", "solid line = mean, dotted line = median") + geom_line(aes(y = pred), linetype = 1) + geom_line(aes(y = pred), linetype = 2) + geom_ribbon(aes(ymin = lo, ymax = up), alpha = 0.5) + geom_ribbon(aes(ymin = lo, ymax = up), alpha = 0.1) ############################################# # look at one level of categorical covariate? #ND <- with(data3, expand.grid( # Time = seq(0, 60, length.out = 50), # TreatReorder = factor("nonreactive", levels = levels(TreatReorder)), # Dist = levels(Dist) #)) ############################################################### ### Plot predicted survival in original time scale... use exp() # predict, using fitted model and ND prs2 <- predict(fit2.int, ND, se.fit = TRUE, type = "lp") # add upper/lower CIs to data ND$pred <- prs2[[1]] ND$se <- prs2[[2]] ND$lo <- exp(ND$pred - 1.96 * ND$se) ND$up <- exp(ND$pred + 1.96 * ND$se) ND$pred <- exp(ND$pred) head(ND, 76) tail(ND, 76) dim(ND) xyplot(pred + lo + up ~ Time | TreatReorder*Dist, data = ND, type = "l", lty = c(1,2,2), col = "black", lwd = 2, xlab = "Time", ylab = "Survival Time") xyplot(pred + lo + up ~ TreatReorder | Dist, data = ND, type = "l", lty = c(1,2,2), col = "black", lwd = 2, xlab = "Treatment", ylab = "Survival Time") xyplot(pred + lo + up ~ Dist | TreatReorder, data = ND, type = "l", lty = c(1,2,2), col = "black", lwd = 2, xlab = "Bait Distance", ylab = "Survival Time") ### MAKE THESE EFFECT PLOTS INTO TABLES OR OTHER GRAPHS?!?!?!?!?!?!? ################################################################################ ################################################################################ ################################################################################ ################################################################################ ### Some descriptive statistics ... ############################### ### Boxplots from original data ... basic visual summary of data. # Boxplot of survival data, by treatment group with(data3, plot(TreatReorder, Time, xlab='Treatment Type', ylab='Minutes Until Taken', xlim=c(0,5), ylim=c(0.1, 100), log='y', type='n')) # Boxplot of survival data, by bait distance with(data3, plot(Dist, Time, xlab='Bait Distance', ylab='Minutes Until Taken', xlim=c(0,3), ylim=c(0.1, 100), log='y', type='n')) ####################################### ### Marginal means, from collected data # subset groups ... pre.1 <- subset(data3,TreatReorder=="pre") light.1 <- subset(data3,TreatReorder=="light") nonreactive.1 <- subset(data3,TreatReorder=="nonreactive") reactive.1 <- subset(data3,TreatReorder=="reactive") close.1 <- subset(data3, Dist == "close") far.1 <- subset(data3, Dist == "far") # subset more specific groups pre.close.1 <- subset(data3, Dist == "close" & TreatReorder == "pre") pre.far.1 <- subset(data3, Dist == "far" & TreatReorder == "pre") light.close.1 <- subset(data3, Dist == "close" & TreatReorder == "light") light.far.1 <- subset(data3, Dist == "far" & TreatReorder == "light") nonreactive.close.1 <- subset(data3, Dist == "close" & TreatReorder == "nonreactive") nonreactive.far.1 <- subset(data3, Dist == "far" & TreatReorder == "nonreactive") reactive.close.1 <- subset(data3, Dist == "close" & TreatReorder == "reactive") reactive.far.1 <- subset(data3, Dist == "far" & TreatReorder == "reactive") # means for each subset mean <- as.data.frame(rbind(mean(pre.1$Time),mean(light.1$Time),mean(nonreactive.1$Time), mean(reactive.1$Time),mean(close.1$Time),mean(far.1$Time), mean(pre.close.1$Time),mean(pre.far.1$Time),mean(light.close.1$Time), mean(light.far.1$Time),mean(nonreactive.close.1$Time),mean(nonreactive.far.1$Time), mean(reactive.close.1$Time),mean(reactive.far.1$Time))) # SDs for each subset SD <- as.data.frame(rbind(sd(pre.1$Time),sd(light.1$Time),sd(nonreactive.1$Time),sd(reactive.1$Time), sd(close.1$Time),sd(far.1$Time),sd(pre.close.1$Time),sd(pre.far.1$Time), sd(light.close.1$Time),sd(light.far.1$Time),sd(nonreactive.close.1$Time), sd(nonreactive.far.1$Time),sd(reactive.close.1$Time),sd(reactive.far.1$Time))) # Standard error function se <-function(x) sd(x)/sqrt(length(x)) # SEs for each subset stderror <- as.data.frame(rbind(se(pre.1$Time),se(light.1$Time),se(nonreactive.1$Time),se(reactive.1$Time), se(close.1$Time),se(far.1$Time),se(pre.close.1$Time),se(pre.far.1$Time), se(light.close.1$Time),se(light.far.1$Time),se(nonreactive.close.1$Time), se(nonreactive.far.1$Time),se(reactive.close.1$Time),se(reactive.far.1$Time))) ###################### # Create summary table summary.table <- as.data.frame(cbind(mean, SD, stderror)) colnames(summary.table) <- c("Mean", "SD", "SE") rownames(summary.table) <- c("Pre", "Light", "Nonreactive", "Reactive", "Close", "Far", "Pre_Close", "Pre_Far", "Light_Close", "Light_Far", "Nonreactive_Close", "Nonreactive_Far", "Reactive_Close", "Reactive_Far") summary.table$Upper_Bar <- apply(summary.table[,c(1,3)], 1, sum) Lower_Bar <- as.data.frame(cbind(summary.table$Mean - summary.table$SE)) summary.table <- as.data.frame(cbind(summary.table, Lower_Bar)) colnames(summary.table) <- c("Mean", "SD", "SE", "Upper_Bar", "Lower_Bar") summary.table # export library(xlsx) #write.xlsx(summary.table, file = "summary_table.xlsx") ########################################### # Which baits SURVIVED the 60-minute trial? survived <- subset(data3, Censor == 0) survived table(survived$Dist, survived$TreatReorder) # pre light nonreactive reactive # far 0 0 0 11 # close 0 5 12 15 ######################################### # Which baits FAILED the 60-minute trial? failed <- subset(data3, Censor == 1) failed table(failed$Dist, failed$TreatReorder) # pre light nonreactive reactive # far 39 39 21 7 # close 39 34 9 3 ################################################################################ ################################################################################ ########################################### ### Pairwise comparisons of predicted means ### Bait distance emm.dist <- emmeans(fit2, "Dist") pairs(emm.dist) # NOTE: Results may be misleading due to involvement in interactions # # contrast estimate SE df t.ratio p.value # far - close -1.65 0.196 225 -8.429 <.0001 # # Results are averaged over the levels of: TreatReorder # Results are given on the log (not the response) scale. pwpm(emm.dist) ### Treatment group emm.treat <- emmeans(fit2, "TreatReorder") pairs(emm.treat) # NOTE: Results may be misleading due to involvement in interactions # # contrast estimate SE df t.ratio p.value # pre - light -0.383 0.198 225 -1.935 0.2164 # pre - nonreactive -1.279 0.244 225 -5.251 <.0001 # pre - reactive -3.966 0.311 225 -12.763 <.0001 # light - nonreactive -0.896 0.244 225 -3.674 0.0017 # light - reactive -3.583 0.311 225 -11.538 <.0001 # nonreactive - reactive -2.688 0.339 225 -7.929 <.0001 # # Results are averaged over the levels of: Dist # Results are given on the log (not the response) scale. # P value adjustment: tukey method for comparing a family of 4 estimates pwpm(emm.treat) # plot plot(emm.dist, comparisons = TRUE) plot(emm.treat, comparisons = TRUE) ################################################################################ ################################################################################ ### END ###