########################################################################################################## ## ## Calculating RT25 values ## ## Project: RT Literature Review ## Author: Leah Swanson ## Date Created: July 13, 2022 by Allen Olmstead ## Last edited: July 27, 2022 by Leah Swanson ## ## Input: ## RT_data: csv of residual toxicity data ## ## Outputs: ## A logistic regression and RT25 value ########################################################################################################## ########################################################################################################## # # ### EACH TIME YOU MAKE AN EDIT: Update "Last edited" field in header and the name of the editor" ######### # ### ########################################################################################################## ########################################################################################################## ### BEFORE YOU RUN THIS SCRIPT ### 1. This script pulls in a csv version of the RT literature review spreadsheet. MAKE SURE ### IT IS UP TO DATE. SAVE THE MOST RECENT RT LITERATURE REVIEW SPREADSHEET AS THE XLSX FILE, JUST ### TO BE SAFE ### 2. Set working directory ############################### library(tidyverse) # read data set in ## To create a data set that works with this code sort master data set by "Paper," ## "Active Ingredient," "Formulation", "Species," and "Application Rate." Remove unused columns ## and separate each into a separate spreadsheet (see example data set). Save as a csv file. setwd("/Users/leahswanson/Desktop") dat <- read_csv("Nomia Johansen et al. 1983 chlorpyrifos.csv", show_col_types = FALSE) ##Getting number of mortality and survived ##create a function to calculate be mortalities mortality <- function(x) { deaths <- (dat$'Mortality (%)'/100) * dat$'number_of_bees' return(deaths) } ##Create a function to calculate alive bees survival <- function(x2) { alive <- dat$'number_of_bees' - ((dat$'Mortality (%)'/100) * dat$'number_of_bees') return(alive) } ##Apply the functions to the dataframe and create a new column to hold the values dat$mort <- apply(X = dat, MARGIN = 1, FUN = mortality) dat$surv <- apply(X = dat, MARGIN = 1, FUN = survival) ##round values to the nearest whole number dat$mort <- round(dat$mort, digits = 0) dat$surv <- round(dat$surv, digits = 0) ########################################################################### # fit a binomial glm (logistic regression) bin_fit <- glm(cbind(mort, surv) ~ time, dat, family = binomial) # vectors for graphing model fit x <- seq(0, 80, length = 1000) y_fit <- predict(bin_fit, data.frame(time = x), type = "response", se.fit = TRUE) lcl <- y_fit$fit - 1.96*y_fit$se.fit # 1.96 = 95% CI (Wald) ucl <- y_fit$fit + 1.96*y_fit$se.fit # plot binomial glm fit ggplot() + geom_ribbon(data = NULL, aes(x = x, ymin = lcl, ymax = ucl), color = NA, alpha = 0.2) + geom_line(data = NULL, aes(x, y_fit$fit), color = "blue", size = 1) + geom_point(data = dat, aes(time, mort/dat$number_of_bees), color = "red", size = 2) + theme_bw() + labs(x = "Time (hr)", y = "Mortality (proportion)") # test for overdisperion using DHARMa package function DHARMa::testDispersion(bin_fit) ########################################################################### # fit quasibinomial to account for overdispersion qbin_fit <- glm(cbind(mort, surv) ~ time, dat, family = quasibinomial) summary(qbin_fit) # vectors for graphing model fit y_qfit <- predict(qbin_fit, data.frame(time = x), type = "response", se.fit = TRUE) qlcl <- y_qfit$fit - 1.96*y_qfit$se.fit qucl <- y_qfit$fit + 1.96*y_qfit$se.fit # fit a quasibinomial glm fit ggplot() + geom_ribbon(data = NULL, aes(x = x, ymin = qlcl, ymax = qucl), color = NA, alpha = 0.2) + geom_line(data = NULL, aes(x, y_qfit$fit), color = "blue", size = 1) + geom_point(data = dat, aes(time, mort/30), color = "red", size = 2) + geom_hline(yintercept = 0.25, lty = 2) + geom_vline(xintercept = 17, lty = 2) + # value from section below theme_bw() + labs(x = "Time (hr)", y = "Mortality (proportion)") ########################################################################### # starting values for algorithm start_low <- 6.5 # pick a value below that estimated from graph start_high <- 7 # pick a value above that estimated from graph target <- 0.25 # the proportional mortality you want a time for # wrapper function for predict get_mort <- function(x) { predict(bin_fit, data.frame(time = x), type = "response") } # initialize a tibble to populate optim_time <- tibble(lo = start_low, hi = start_high, lo_pred = get_mort(start_low), hi_pred = get_mort(start_high)) # loop for algorithm that fills out tibble with values for(i in 1:99) { diff <- optim_time[i, 2] - optim_time[i, 1] if(optim_time$lo_pred[i] - target < target - optim_time$hi_pred[i]) { optim_time[i + 1, 1] <- optim_time[i, 1] optim_time[i + 1, 2] <- optim_time[i, 2] - diff/2 } else { optim_time[i + 1, 1] <- optim_time[i, 1] + diff/2 optim_time[i + 1, 2] <- optim_time[i, 2] } optim_time[i + 1, 3] <- get_mort(optim_time$lo[i + 1]) optim_time[i + 1, 4] <- get_mort(optim_time$hi[i + 1]) } # check the last rows to verify sufficient convergence tail(optim_time) ########################################################################### # nls fits for logistic equation dat <- mutate(dat, mort2 = mort/(mort + surv)) # 2 parameter logistic nls_fit2 <- nls(mort/(mort + surv) ~ 1/(1 + exp(-k*(time - x0))), dat, start = list(k = -0.1, x0 = 150)) # 3 parameter logistic nls_fit3a <- nls(mort/(mort + surv) ~ max/(1 + exp(-k*(time - x0))), dat, start = list(max = 1, k = -0.1, x0 = 150)) # alternative 3 parameter logistic nls_fit3b <- nls(mort/(mort + surv) ~ (1 - min)/(1 + exp(-k*(time - x0))) + min, dat, start = list(k = -0.1, x0 = 150, min = 0)) # 4 parameter logistic nls_fit4 <- nls(mort/(mort + surv) ~ (max - min)/(1 + exp(-k*(time - x0))) + min, dat, start = list(max = 0.8, k = -0.1, x0 = 150, min = 0)) # tibble for fit values for graphing fitz <- tibble(fit_x = rep(x, 3), fit_y = c(predict(nls_fit2, data.frame(time = x)), predict(nls_fit3b, data.frame(time = x)), predict(nls_fit4, data.frame(time = x))), Fit = rep(c("2", "3b", "4"), each = length(x))) # graph the data with all nls fits ggplot() + geom_line(data = fitz, aes(fit_x, fit_y, color = fitz$Fit), size = 1) + geom_point(data = dat, aes(time, mort2), color = "red", size = 2) + theme_bw() + labs(x = "Time (hr)", y = "Mortality (proportion)") ##testing the fit of the models library(AICcmodavg) model1 <- lm(time ~ dat$`Mortality (%)` + formulation, data = dat) model2 <- lm(time ~ dat$`Mortality (%)`, data = dat) models <- list(model1, model2) mod.names <- c('formulation', 'sans formulation') aictab(cand.set = models, modnames = mod.names) models <- list(nls_fit2, nls_fit3b, nls_fit4) mod.names <- c('2 parameter', '3 parameter', '4 parameter') aictab(cand.set = models, modnames = mod.names)