edwin <- read.csv("PynegarDataUpdated.csv") edwin$Water_System_No <- as.factor(edwin$Water_System_No) edwin$Stream_Spring_2015 <- as.factor(edwin$Stream_Spring_2015) edwin$treat <- as.numeric(as.character(edwin$Treatment_Control_Site)) edwin$treat.fac <- as.factor(ifelse(edwin$treat==0, "Control", "Treatment")) edwin$logEcoli2010 <- log(edwin$E_coli_2010 + 1) ##clean data are required as MatchIt package is very picky about ##missing data (any missing data even if not in analysis variables means analysis won't run) ##Baseline data for 2010 where there is water quality data edwin.clean.all <- edwin[edwin$In_RCT==1 & !is.na(edwin$E_coli_2010) & !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap", "treat", "E_coli_2010", "Offset", "logEcoli2010")] length(unique(droplevels(edwin.clean.all$Community_No))) table(edwin.clean.all$Category_Site_Intake_Tap) ##Data where there are common data between 2010 and 2015 (the same sites for intakes) edwin.clean <- edwin[edwin$In_RCT==1 & edwin$Same_Site_2010_15==1 & !is.na(edwin$E_coli_2010) & !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap", "Total_E_coli_2015", "Water_System_No", "treat", "E_coli_2010", "Offset", "logEcoli2010")] length(unique(droplevels(edwin.clean$Community_No))) table(edwin.clean$Category_Site_Intake_Tap) edwin.clean.2015 <- na.omit(edwin[edwin$In_RCT==1 & edwin$Data_Present_2015==1 & !is.na(edwin$Total_E_coli_2015) & !is.na(edwin$Category_Site_Intake_Tap) & edwin$Stream_Spring_2015!="2" & !is.na(edwin$Water_System_No) & !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap", "Total_E_coli_2015", "Water_System_No", "treat", "Offset")]) length(unique(droplevels(edwin.clean.2015$Community_No))) table(edwin.clean.2015$Category_Site_Intake_Tap) ##Non RCT sites with data unique(edwin[edwin$In_RCT==0 & !is.na(edwin$Total_E_coli_2015), ]$Community_No) ##Scheme uptake analysis ##First select subset and average over taps/intakes to avoid duplication require(dplyr) require(ggplot2) edwin.subset <- edwin %>% filter(!is.na(E_coli_2010) & In_RCT) %>% select(Community_No, Same_Site_2010_15, treat.fac, L1_proportion_all_L, L1_proportion_eligible_L) %>% group_by(Community_No, Same_Site_2010_15, treat.fac) %>% summarize_all(mean, na.rm=TRUE) edwin.subset <- rbind(data.frame(edwin.subset, common.fac="All communities with baseline water data"), data.frame(edwin.subset[edwin.subset$Same_Site_2010_15==1, ], common.fac="Subset of communities with comparable end-line")) summary(edwin.subset) summary(edwin.subset[edwin.subset$treat.fac=="Treatment" & edwin.subset$common.fac=="All communities with baseline water data",]) ##Fig 7 ggplot(edwin.subset, aes(y=L1_proportion_eligible_L, x=treat.fac)) + geom_boxplot() + facet_wrap(~common.fac) + theme_bw() + xlab(NULL) + ylab("Eligible proportion of community in scheme") + theme(legend.position="none") + scale_y_continuous(labels = scales::percent) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + theme(axis.text = element_text(size = 12)) + theme(legend.text = element_text(size = 12)) + theme(legend.title = element_text(size = 14)) ggsave("Figure 7.pdf", device="pdf") ##test for differences ##All t.test(L1_proportion_eligible_L~treat.fac, data=edwin.subset[edwin.subset$common.fac=="All communities with baseline water data",]) ##after attrition t.test(L1_proportion_eligible_L~treat.fac, data=edwin.subset[edwin.subset$common.fac!="All communities with baseline water data",]) ##Compliance analysis first at intake level require(dplyr) compliance.summary <- edwin %>% filter(In_RCT==1 & Category_Site_Intake_Tap=="A") %>% group_by(treat.fac) %>% summarize( N.compliance = sum(!is.na(Compliant_ARA_2015)), N.cattle = sum(!is.na(Cattle_Access_2015)), perc.compliance = mean(Compliant_ARA_2015, na.rm=TRUE) * 100, perc.cattle = mean(Cattle_Access_2015, na.rm=TRUE) * 100) ##Compliance summary at community level compliance.summary.comm <- edwin %>% filter(In_RCT==1 & Category_Site_Intake_Tap=="A") %>% group_by(Community_No, treat.fac) %>% summarize( N.compliance = sum(!is.na(Compliant_ARA_2015)), N.cattle = sum(!is.na(Cattle_Access_2015)), mean.compliance = mean(Compliant_ARA_2015, na.rm=TRUE), mean.cattle = mean(Cattle_Access_2015, na.rm=TRUE)) %>% group_by(treat.fac) %>% summarize( N.compliance.t = sum(!is.na(mean.compliance)), N.cattle.t = sum(!is.na(mean.compliance)), mean.compliance.t = mean(mean.compliance, na.rm=TRUE), mean.cattle.t = mean(mean.cattle, na.rm=TRUE)) ##Endline analysis ##Roof intakes excluded ##count number of communities in this analysis length(unique(edwin[edwin$In_RCT==1 & !is.na(edwin$treat) & edwin$Data_Include_2015==1 & edwin$Stream_Spring_2015!=2, ]$Community_No))-1 require(glmmTMB) mod1_2015.tmb <- glmmTMB(Total_E_coli_2015 ~ (1 | Community_No/Water_System_No) + treat + Category_Site_Intake_Tap + Stream_Spring_2015, data = edwin[edwin$In_RCT==1 & edwin$Data_Include_2015==1 & edwin$Stream_Spring_2015!=2, ], family = "nbinom1") nrow(mod1_2015.tmb$frame) length(unique(mod1_2015.tmb$frame$Community_No)) with(mod1_2015.tmb$frame, table(Category_Site_Intake_Tap)) summary(mod1_2015.tmb) mod1_2015_95int <- as.data.frame(confint(mod1_2015.tmb)) colnames(mod1_2015_95int) <- c("li", "ui", "Mean") mod1_2015_95int$Predictors <- c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)", "Endline effect of being a\n treatment community site", "Tap compared\n with intake", "Spring intake compared\n with stream intake", "Water system random effect", "Community random effect", "Sigma") mod1_2015_95int$Predictors <- factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)", "Endline effect of being a\n treatment community site", "Tap compared\n with intake", "Spring intake compared\n with stream intake", "Water system random effect", "Community random effect", "Sigma"), levels=c("Endline effect of being a\n treatment community site", "Spring intake compared\n with stream intake", "Tap compared\n with intake", "Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)", "Water system random effect", "Community random effect", "Sigma")) ##Fig 6 ggplot(mod1_2015_95int[1:4, ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) + geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") + xlab("Predictors") + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + coord_flip() ggsave("Figure 6.pdf", device="pdf") ##Table ?? require(officer) require(flextable) endline.table <- mod1_2015_95int[ , c(4, 3, 1, 2)] colnames(endline.table) <- c("Model coefficient", "Value", "Lower 95% value", "Upper 95% value") endline.table <- endline.table %>% regulartable() %>% set_formatter(Value = function(x) round(x, 1)) %>% set_formatter("Lower 95% value" = function(x) round(x, 1)) %>% set_formatter("Upper 95% value" = function(x) round(x, 1)) %>% bold(part="header") %>% theme_box() endline.doc <- read_docx() endline.doc <- body_add_flextable(endline.doc, value=endline.table) print(endline.doc, target="endlinetable.docx") ##Matched analysis require(MatchIt) m.ecoli.clean <- matchit(treat ~ E_coli_2010, data=edwin.clean[edwin.clean$Category_Site_Intake_Tap=="A", ], method="genetic") m.ecoli.clean.data <- match.data(m.ecoli.clean) ##need to merge back in tap data edwin.clean$distance <- m.ecoli.clean.data$distance[match(edwin.clean$Water_System_No, m.ecoli.clean.data$Water_System_No)] edwin.clean$weights <- m.ecoli.clean.data$weights[match(edwin.clean$Water_System_No, m.ecoli.clean.data$Water_System_No)] m.ecoli.clean.data <- rbind(m.ecoli.clean.data, edwin.clean[edwin.clean$Category_Site_Intake_Tap=="B", ]) ##count communites length(unique(m.ecoli.clean.data$Community_No)) length(unique(m.ecoli.clean.data$Water_System_No)) nrow(m.ecoli.clean.data) ##Unweigted model (glmmADMB does not allow weights) require(glmmADMB) mod1_2010_15 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + treat * E_coli_2010 + offset(Offset), data = m.ecoli.clean.data, family = "nbinom") ##Weighted model with glmmTMB require(glmmTMB) mod1_2010_15.tmb <- glmmTMB(Total_E_coli_2015 ~ (1 | Water_System_No) + treat * E_coli_2010 + offset(Offset), data = m.ecoli.clean.data, family = "nbinom2", weights=weights) summary(mod1_2010_15.tmb) mod1_2010_15_95int <- as.data.frame(confint(mod1_2010_15.tmb)) colnames(mod1_2010_15_95int) <- c("li", "ui", "Mean") mod1_2010_15_95int$Predictors <- as.factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)", "Baseline effect of being a\n treatment community site", "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015", "Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites", "Water system random effect", "Sigma")) mod1_2010_15_95int$Predictors <- factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)", "Baseline effect of being a\n treatment community site", "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015", "Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites", "Water system random effect", "Sigma"), levels=c("Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites", "Baseline effect of being a\n treatment community site", "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015", "Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)", "Water system random effect", "Sigma")) ##Fig 5 ggplot(mod1_2010_15_95int[1:4, ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) + geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") + xlab("Predictors") + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + coord_flip() ggsave("Figure 5.pdf", device="pdf") matched.table <- mod1_2010_15_95int[ , c(4, 3, 1, 2)] colnames(matched.table) <- c("Model coefficient", "Value", "Lower 95% value", "Upper 95% value") matched.table <- matched.table %>% regulartable() %>% set_formatter(Value = function(x) round(x, 1)) %>% set_formatter("Lower 95% value" = function(x) round(x, 1)) %>% set_formatter("Upper 95% value" = function(x) round(x, 1)) %>% bold(part="header") %>% theme_box() matched.doc <- read_docx() matched.doc <- body_add_flextable(matched.doc, value=matched.table) print(matched.doc, target="matchedtable.docx") ##Blanace chacking for baseline data require(cobalt) require(ggplot2) require(reshape2) # formula.balance <- as.formula(treat ~ E_coli_2010 + OTB_Size_Strat + Cows_Strat + Turbidity_JTU_0 + hospital_time) balance.names <- data.frame(new=c("E. coli CFU in 2010", "Number of households", "Cows per household", "Turbidity", "Time to hospital"), old=c("E_coli_2010", "OTB_Size_Strat", "Cows_Strat", "Turbidity_JTU_0", "hospital_time")) ##Estimate balance ##2010 & 2015 data bal.2015 <- bal.tab(formula.balance, data=edwin[edwin$Category_Site_Intake_Tap=="A" & !is.na(edwin$treat) & edwin$Same_Site_2010_15==1, ], disp.means=TRUE, un=TRUE, s.d.denom="pooled") ##2010 data bal.2010 <- bal.tab(formula.balance, data=edwin[edwin$Category_Site_Intake_Tap=="A" & !is.na(edwin$treat) & edwin$Data_Present_2010==1 & edwin$In_RCT==1, ], disp.means=TRUE, un=TRUE, s.d.denom="pooled") ##Extract data for plotting bal.2015.p.data <- cbind(balance.names, bal.2015$Balance[rownames(bal.2015$Balance) %in% balance.names$old, ]) bal.2010.p.data <- cbind(balance.names, bal.2010$Balance[rownames(bal.2010$Balance) %in% balance.names$old, ]) bal.all.p.data <- cbind(bal.2015.p.data[, 1:2], diff.2010 = abs(bal.2010.p.data$Diff.Un), diff.2015 = abs(bal.2015.p.data$Diff.Un)) bal.all.p.data.l <- melt(bal.all.p.data, measure.vars = c("diff.2010", "diff.2015")) bal.all.p.data.l$Balance <- rep(c("All communities\n with baseline water data", "Subset of communities\n with comparable end-line"), each=nrow(bal.all.p.data.l)/2) bal.all.p.data.l$new <- factor(bal.all.p.data.l$new, levels=c("E. coli CFU in 2010", "Turbidity", "Time to hospital", "Cows per household", "Number of households")) ##Figure 3 ##Plot balance comparison ggplot(bal.all.p.data.l, aes(x=value, y=new, shape=Balance)) + geom_vline(xintercept=0.25, lty=2, lwd=0.75) + geom_vline(xintercept=0.0, lty=1, lwd=0.75) + geom_point(size=5) + xlim(0, 0.8) + theme(legend.position="none") + theme_bw() + ggtitle(NULL) + xlab("Standardized absolute mean difference") + ylab(NULL) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 16)) + theme(axis.text = element_text(size = 12)) + theme(legend.text = element_text(size = 12)) + theme(legend.title = element_text(size = 14)) ggsave("Figure 3.pdf", device="pdf") ##Statistical analysis of baseline difference require(glmmTMB) mod1_2010_common.tmb <- glmmTMB(E_coli_2010 ~ (1 | Community_No) + treat, data = edwin[edwin$In_RCT==1 & edwin$Same_Site_2010_15==1 & edwin$Stream_Spring_2015!=2 & edwin$Category_Site_Intake_Tap =="A", ], family = "nbinom1") summary(mod1_2010_common.tmb) mod1_2010_all.tmb <- glmmTMB(E_coli_2010 ~ (1 | Community_No) + treat, data = edwin[edwin$In_RCT==1 & edwin$Data_Present_2010==1 & edwin$Category_Site_Intake_Tap =="A", ], family = "nbinom1") summary(mod1_2010_all.tmb) simple.common <- confint(mod1_2010_common.tmb) simple.all <- confint(mod1_2010_all.tmb) simple.merge <- as.data.frame(rbind(simple.all, simple.common)) colnames(simple.merge) <- c("li", "ui", "Mean") simple.merge$common.fac <- as.factor(rep(c("All communities with baseline water data", "Subset of communities\n with comparable end-line"), each=4)) simple.merge$Predictors <- as.factor(rep(c("Intercept", "Community difference\n at baseline", "Community random effect", "Sigma"), 2)) require(ggplot2) ##Fig for reviewers ggplot(simple.merge[c(1:2, 5:6), ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) + geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") + xlab("Predictors") + facet_wrap(~common.fac) + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + coord_flip() ggsave("Fig for response.pdf", device="pdf") ##biophysical analysis require("reshape2") require("ggplot2") require("lme4") require("glmmADMB") require("MuMIn") require("dplyr") require("grid") df <- edwin df$Water_System_No <- as.factor(df$Water_System_No) df$Category_Site_Intake_Tap <- as.factor(df$Category_Site_Intake_Tap) df$Data_Present_2010 <- as.factor(df$Data_Present_2010) df$Data_Present_2015 <- as.factor(df$Data_Present_2015) df$Same_Site_2010_15 <- as.factor(df$Same_Site_2010_15) df$Data_Complete_2015 <- as.factor(df$Data_Complete_2015) df$Data_Include_2015 <- as.factor(df$Data_Include_2015) df$Data_Include_RCT_2015 <- as.factor(df$Data_Include_RCT_2015) df$In_RCT <- as.factor(df$In_RCT) df$Treatment_Control_Site <- as.factor(df$Treatment_Control_Site) df$Stream_Spring_2015 <- as.factor(df$Stream_Spring_2015) df$Disturbance_2015 <- as.factor(df$Disturbance_2015) df$Black_Mud_2015 <- as.factor(df$Black_Mud_2015) df$Faeces_Presence_2015 <- as.factor(df$Faeces_Presence_2015) df$Cattle_2015 <- as.factor(df$Cattle_2015) df$Agriculture_2015 <- as.factor(df$Agriculture_2015) df$Substrate_2015 <- as.factor(df$Substrate_2015) df$Cattle_Access_2015 <- as.factor(df$Cattle_Access_2015) df$Compliant_ARA_2015 <- as.factor(df$Compliant_ARA_2015) mod1_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 + Substrate_2015 + Temp_2015 * pH_2015 * Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod1_2015) AIC(mod1_2015) mod2_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 + Substrate_2015 + Temp_2015 + pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod2_2015) AIC(mod2_2015) mod3_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 + Temp_2015 + pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod3_2015) AIC(mod3_2015) mod4_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 + pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod4_2015) AIC(mod4_2015) mod5_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod5_2015) AIC(mod5_2015) mod6_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod6_2015) AIC(mod6_2015) mod7_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod7_2015) AIC(mod7_2015) mod8_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod8_2015) AIC(mod8_2015) mod9_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod9_2015) AIC(mod9_2015) mod10_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod10_2015) AIC(mod10_2015) mod11_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod11_2015) AIC(mod11_2015) round(Weights(AIC(mod1_2015, mod2_2015, mod3_2015, mod4_2015, mod5_2015, mod6_2015, mod7_2015, mod8_2015, mod9_2015, mod10_2015, mod11_2015)), 10) mod12_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015 + Compliant_ARA_2015 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod12_2015) AIC(mod12_2015) mod13_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod13_2015) AIC(mod13_2015) mod14_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod14_2015) AIC(mod14_2015) mod15_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015 + Cattle_Access_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod15_2015) AIC(mod15_2015) mod16_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod16_2015) AIC(mod16_2015) mod17_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod17_2015) AIC(mod17_2015) mod18_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom") summary(mod18_2015) AIC(mod18_2015) Weights(AIC(mod7_2015, mod12_2015, mod13_2015, mod14_2015, mod15_2015, mod16_2015, mod17_2015, mod18_2015)) mod19_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_RCT_2015 == 1, ]), family = "nbinom") summary(mod19_2015) AIC(mod19_2015) mod20_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 + Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015 + Treatment_Control_Site, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_RCT_2015 == 1, ]), family = "nbinom") summary(mod20_2015) AIC(mod20_2015) Weights(AIC(mod19_2015, mod20_2015)) # Coefficient diagram for model 16 mod16_2015_95int <- confint(mod16_2015) mod16_2015_95int # Intercept represents 2015 mean count in control sites, 2010=average increase # 2010/2015 mod16_2015_df <- data.frame(parameter = names(coef(mod16_2015)), Predictors = c("Log-transformed E. coli\n CFU concentration", "No disturbance\n of sediment", "Tap compared\n with intake", "Spring intake compared\n with stream intake", "Turbidity (FAU/100)", "Faeces present\n in forest", "Faeces present\n in water"), coef = coef(mod16_2015), ymin = mod16_2015_95int[, 1], ymax = mod16_2015_95int[, 2]) target <- c("Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)", "Spring intake compared\n with stream intake", "Tap compared\n with intake", "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration") mod16_2015_df[match(target, mod16_2015_df$Predictors), ] mod16_2015_df$Pred2 <- factor(mod16_2015_df$Predictors, levels = c("Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)", "Spring intake compared\n with stream intake", "Tap compared\n with intake", "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration")) ggplot(mod16_2015_df, aes(x = Pred2, y = coef, ymin = ymin, ymax = ymax)) + geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") + xlab("Predictors") + coord_cartesian(ylim = c(-2.55, 4.25)) + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + coord_flip() ggsave("Figure 4.pdf", device="pdf") # Coefficient diagram for model 20 mod20_2015_95int <- confint(mod20_2015) mod20_2015_95int mod20_2015_df <- data.frame(parameter = names(coef(mod20_2015)), Predictors = c("Log-transformed E. coli\n CFU concentration at\n control community sites", "No disturbance\n of sediment", "Tap compared\n with intake", "Spring intake compared\n with stream intake", "Turbidity (FAU/100)", "Faeces present\n in forest", "Faeces present\n in water", "Treatment community site"), coef = coef(mod20_2015), ymin = mod20_2015_95int[, 1], ymax = mod20_2015_95int[, 2]) target <- c("Treatment community site", "Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)", "Spring intake compared\n with stream intake", "Tap compared\n with intake", "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration at\n control community sites") mod20_2015_df[match(target, mod20_2015_df$Predictors), ] mod20_2015_df$Pred2 <- factor(mod20_2015_df$Predictors, levels = c("Treatment community site", "Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)", "Spring intake compared\n with stream intake", "Tap compared\n with intake", "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration at\n control community sites")) ggplot(mod20_2015_df, aes(x = Pred2, y = coef, ymin = ymin, ymax = ymax)) + geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") + xlab("Predictors") + coord_cartesian(ylim = c(-2.55, 4.25)) + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) + coord_flip() ggsave("Supplementary biophysical figure.pdf", device="pdf")