edwin <- read.csv("PynegarDataUpdated.csv")
edwin$Water_System_No <- as.factor(edwin$Water_System_No)
edwin$Stream_Spring_2015 <- as.factor(edwin$Stream_Spring_2015)
edwin$treat <- as.numeric(as.character(edwin$Treatment_Control_Site))
edwin$treat.fac <- as.factor(ifelse(edwin$treat==0, "Control", "Treatment"))
edwin$logEcoli2010 <- log(edwin$E_coli_2010 + 1)

##clean data are required as MatchIt package is very picky about
##missing data (any missing data even if not in analysis variables means analysis won't run)
##Baseline data for 2010 where there is water quality data 
edwin.clean.all <- edwin[edwin$In_RCT==1 &
                     !is.na(edwin$E_coli_2010) &
                     !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap",
                                            "treat",
                                            "E_coli_2010", "Offset", "logEcoli2010")]
length(unique(droplevels(edwin.clean.all$Community_No)))
table(edwin.clean.all$Category_Site_Intake_Tap)
##Data where there are common data between 2010 and 2015 (the same sites for intakes)
edwin.clean <- edwin[edwin$In_RCT==1 &
                     edwin$Same_Site_2010_15==1 &
                     !is.na(edwin$E_coli_2010) &
                     !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap", 
                                            "Total_E_coli_2015", "Water_System_No",
                                            "treat",
                                            "E_coli_2010", "Offset", "logEcoli2010")]
length(unique(droplevels(edwin.clean$Community_No)))
table(edwin.clean$Category_Site_Intake_Tap)
edwin.clean.2015 <- na.omit(edwin[edwin$In_RCT==1 &
                     edwin$Data_Present_2015==1 &
                     !is.na(edwin$Total_E_coli_2015) & !is.na(edwin$Category_Site_Intake_Tap) &
                     edwin$Stream_Spring_2015!="2" & !is.na(edwin$Water_System_No)  & 
                     !is.na(edwin$treat), c("Site_Code", "Community_No", "Category_Site_Intake_Tap", 
                                            "Total_E_coli_2015", "Water_System_No",
                                            "treat", "Offset")])
length(unique(droplevels(edwin.clean.2015$Community_No)))
table(edwin.clean.2015$Category_Site_Intake_Tap)

##Non RCT sites with data
unique(edwin[edwin$In_RCT==0 & !is.na(edwin$Total_E_coli_2015), ]$Community_No)

##Scheme uptake analysis
##First select subset and average over taps/intakes to avoid duplication
require(dplyr)
require(ggplot2)
edwin.subset <- edwin %>%
    filter(!is.na(E_coli_2010) & In_RCT) %>%
    select(Community_No, Same_Site_2010_15, treat.fac, L1_proportion_all_L, L1_proportion_eligible_L) %>%
    group_by(Community_No, Same_Site_2010_15, treat.fac) %>%
    summarize_all(mean, na.rm=TRUE)
edwin.subset <- rbind(data.frame(edwin.subset, common.fac="All communities with baseline water data"),
                      data.frame(edwin.subset[edwin.subset$Same_Site_2010_15==1, ],
                            common.fac="Subset of communities with comparable end-line"))

summary(edwin.subset)
summary(edwin.subset[edwin.subset$treat.fac=="Treatment" &
                     edwin.subset$common.fac=="All communities with baseline water data",])


##Fig 7
ggplot(edwin.subset, aes(y=L1_proportion_eligible_L, x=treat.fac)) +
    geom_boxplot() +
    facet_wrap(~common.fac) +
    theme_bw() + xlab(NULL) + ylab("Eligible proportion of community in scheme") +
    theme(legend.position="none") +
    scale_y_continuous(labels = scales::percent) +
    theme(plot.title = element_text(size = 24)) +
    theme(axis.title = element_text(size = 18)) +
    theme(axis.text = element_text(size = 12)) +
    theme(legend.text = element_text(size = 12)) +
    theme(legend.title = element_text(size = 14))
ggsave("Figure 7.pdf", device="pdf")

##test for differences
##All
t.test(L1_proportion_eligible_L~treat.fac,
       data=edwin.subset[edwin.subset$common.fac=="All communities with baseline water data",])
##after attrition
t.test(L1_proportion_eligible_L~treat.fac,
       data=edwin.subset[edwin.subset$common.fac!="All communities with baseline water data",])

##Compliance analysis first at intake level
require(dplyr)
compliance.summary <- edwin %>%
    filter(In_RCT==1 & Category_Site_Intake_Tap=="A") %>%
    group_by(treat.fac) %>%
    summarize(
        N.compliance = sum(!is.na(Compliant_ARA_2015)),
        N.cattle = sum(!is.na(Cattle_Access_2015)),
        perc.compliance = mean(Compliant_ARA_2015, na.rm=TRUE) * 100,
        perc.cattle = mean(Cattle_Access_2015, na.rm=TRUE) * 100)

##Compliance summary at community level
compliance.summary.comm <- edwin %>%
    filter(In_RCT==1 & Category_Site_Intake_Tap=="A") %>%
    group_by(Community_No, treat.fac) %>%
    summarize(
        N.compliance = sum(!is.na(Compliant_ARA_2015)),
        N.cattle = sum(!is.na(Cattle_Access_2015)),
        mean.compliance = mean(Compliant_ARA_2015, na.rm=TRUE),
        mean.cattle = mean(Cattle_Access_2015, na.rm=TRUE)) %>%
    group_by(treat.fac) %>%
    summarize(
        N.compliance.t = sum(!is.na(mean.compliance)),
        N.cattle.t = sum(!is.na(mean.compliance)),
        mean.compliance.t = mean(mean.compliance, na.rm=TRUE),
        mean.cattle.t = mean(mean.cattle, na.rm=TRUE))


##Endline analysis
##Roof intakes excluded

##count number of communities in this analysis
length(unique(edwin[edwin$In_RCT==1 &
      !is.na(edwin$treat) &
      edwin$Data_Include_2015==1 &
      edwin$Stream_Spring_2015!=2, ]$Community_No))-1

require(glmmTMB)
mod1_2015.tmb <- glmmTMB(Total_E_coli_2015 ~ (1 | Community_No/Water_System_No) +
                             treat + Category_Site_Intake_Tap + Stream_Spring_2015,
                         data = edwin[edwin$In_RCT==1 & edwin$Data_Include_2015==1 & edwin$Stream_Spring_2015!=2,
                                      ], family = "nbinom1")
nrow(mod1_2015.tmb$frame)
length(unique(mod1_2015.tmb$frame$Community_No))
with(mod1_2015.tmb$frame, table(Category_Site_Intake_Tap))
summary(mod1_2015.tmb)
mod1_2015_95int <- as.data.frame(confint(mod1_2015.tmb))
colnames(mod1_2015_95int) <- c("li", "ui", "Mean")
mod1_2015_95int$Predictors <- c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)",
                                "Endline effect of being a\n treatment community site",
                                "Tap compared\n with intake",
                                "Spring intake compared\n with stream intake",
                                "Water system random effect",
                                "Community random effect",
                                "Sigma")
mod1_2015_95int$Predictors <- factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)",
                                "Endline effect of being a\n treatment community site",
                                "Tap compared\n with intake",
                                "Spring intake compared\n with stream intake",
                                "Water system random effect",
                                 "Community random effect",
                                "Sigma"),
                                levels=c("Endline effect of being a\n treatment community site",
                                         "Spring intake compared\n with stream intake",
                                         "Tap compared\n with intake",
                                         "Log-transformed 2015\n E. coli CFU concentration\n in control sites (20ml equivalent)",
                                         "Water system random effect",
                                          "Community random effect",
                                         "Sigma"))

                                         
##Fig 6
ggplot(mod1_2015_95int[1:4, ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) +
    geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") +
    xlab("Predictors") + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) +
    theme(axis.title = element_text(size = 18)) + coord_flip()
ggsave("Figure 6.pdf", device="pdf")

##Table ??
require(officer)
require(flextable)

endline.table <- mod1_2015_95int[ , c(4, 3, 1, 2)]
colnames(endline.table) <- c("Model coefficient", "Value", "Lower 95% value", "Upper 95% value")
endline.table <-  endline.table %>%
    regulartable() %>%
    set_formatter(Value = function(x) round(x, 1)) %>%
    set_formatter("Lower 95% value" = function(x) round(x, 1)) %>%
    set_formatter("Upper 95% value" = function(x) round(x, 1)) %>%
    bold(part="header") %>%
    theme_box()

endline.doc <- read_docx()
endline.doc <- body_add_flextable(endline.doc, value=endline.table)
print(endline.doc, target="endlinetable.docx")


##Matched analysis
require(MatchIt)


m.ecoli.clean <- matchit(treat ~ E_coli_2010, 
                         data=edwin.clean[edwin.clean$Category_Site_Intake_Tap=="A", ],
                         method="genetic")
m.ecoli.clean.data <- match.data(m.ecoli.clean)

##need to merge back in tap data
edwin.clean$distance <- m.ecoli.clean.data$distance[match(edwin.clean$Water_System_No,
                                                          m.ecoli.clean.data$Water_System_No)]
edwin.clean$weights <- m.ecoli.clean.data$weights[match(edwin.clean$Water_System_No,
                                                        m.ecoli.clean.data$Water_System_No)]

m.ecoli.clean.data <- rbind(m.ecoli.clean.data,
                            edwin.clean[edwin.clean$Category_Site_Intake_Tap=="B", ])

##count communites
length(unique(m.ecoli.clean.data$Community_No))
length(unique(m.ecoli.clean.data$Water_System_No))
nrow(m.ecoli.clean.data)

##Unweigted model (glmmADMB does not allow weights)
require(glmmADMB)
mod1_2010_15 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + treat *
                             E_coli_2010 + offset(Offset),
                         data = m.ecoli.clean.data, family = "nbinom")

##Weighted model with glmmTMB
require(glmmTMB)
mod1_2010_15.tmb <- glmmTMB(Total_E_coli_2015 ~ (1 | Water_System_No) + treat *
                                E_coli_2010 + offset(Offset),
                            data = m.ecoli.clean.data, family = "nbinom2", weights=weights)
summary(mod1_2010_15.tmb)
mod1_2010_15_95int <- as.data.frame(confint(mod1_2010_15.tmb))

colnames(mod1_2010_15_95int) <- c("li", "ui", "Mean")
mod1_2010_15_95int$Predictors <- as.factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)",
                                   "Baseline effect of being a\n treatment community site",
                                   "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015",
                                   "Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites",
                                   "Water system random effect",
                                   "Sigma"))

mod1_2010_15_95int$Predictors <- factor(c("Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)",
                                   "Baseline effect of being a\n treatment community site",
                                   "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015",
                                   "Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites",
                                   "Water system random effect",
                                   "Sigma"),
                                   levels=c("Difference in differences in E. coli\n CFU concentrations between\n 2010 and 2015 between treatment\n and control community sites",
                                            "Baseline effect of being a\n treatment community site",
                                            "Mean difference between\n E. coli CFU concentrations\n at sites in 2010 and 2015",
                                            "Log-transformed 2015\n E. coli CFU concentration\n in control sites (5ml equivalent)",
                                            "Water system random effect",
                                            "Sigma"))

##Fig 5
ggplot(mod1_2010_15_95int[1:4, ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) +
    geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") +
    xlab("Predictors") + theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) +
    theme(axis.title = element_text(size = 18)) + coord_flip()
ggsave("Figure 5.pdf", device="pdf")


matched.table <- mod1_2010_15_95int[ , c(4, 3, 1, 2)]
colnames(matched.table) <- c("Model coefficient", "Value", "Lower 95% value", "Upper 95% value")
matched.table <-  matched.table %>%
    regulartable() %>%
    set_formatter(Value = function(x) round(x, 1)) %>%
    set_formatter("Lower 95% value" = function(x) round(x, 1)) %>%
    set_formatter("Upper 95% value" = function(x) round(x, 1)) %>%
    bold(part="header") %>%
    theme_box()

matched.doc <- read_docx()
matched.doc <- body_add_flextable(matched.doc, value=matched.table)
print(matched.doc, target="matchedtable.docx")


##Blanace chacking for baseline data
require(cobalt)
require(ggplot2)
require(reshape2)
#

formula.balance <- as.formula(treat ~ E_coli_2010 + OTB_Size_Strat + Cows_Strat +
                 Turbidity_JTU_0 +
                 hospital_time)
balance.names <- data.frame(new=c("E. coli CFU in 2010", "Number of households",
                                  "Cows per household",
                                  "Turbidity", "Time to hospital"),
                            old=c("E_coli_2010", "OTB_Size_Strat", "Cows_Strat",
                                  "Turbidity_JTU_0", "hospital_time"))

##Estimate balance
##2010 & 2015 data
bal.2015 <- bal.tab(formula.balance,
              data=edwin[edwin$Category_Site_Intake_Tap=="A" &
                         !is.na(edwin$treat) &
                       edwin$Same_Site_2010_15==1, ],
             disp.means=TRUE, un=TRUE, s.d.denom="pooled")

##2010 data
bal.2010 <- bal.tab(formula.balance,
              data=edwin[edwin$Category_Site_Intake_Tap=="A" &
                         !is.na(edwin$treat) & 
                       edwin$Data_Present_2010==1 & edwin$In_RCT==1, ],
              disp.means=TRUE, un=TRUE, s.d.denom="pooled")


##Extract data for plotting
bal.2015.p.data <- cbind(balance.names, bal.2015$Balance[rownames(bal.2015$Balance) %in% balance.names$old, ])
bal.2010.p.data <- cbind(balance.names, bal.2010$Balance[rownames(bal.2010$Balance) %in% balance.names$old, ])
bal.all.p.data <- cbind(bal.2015.p.data[, 1:2], diff.2010 = abs(bal.2010.p.data$Diff.Un),
                        diff.2015 = abs(bal.2015.p.data$Diff.Un))
bal.all.p.data.l <- melt(bal.all.p.data, measure.vars = c("diff.2010", "diff.2015"))
bal.all.p.data.l$Balance <- rep(c("All communities\n with baseline water data", "Subset of communities\n with comparable end-line"),
                                each=nrow(bal.all.p.data.l)/2)
bal.all.p.data.l$new <- factor(bal.all.p.data.l$new, levels=c("E. coli CFU in 2010", "Turbidity",
                                                           "Time to hospital", "Cows per household",
                                                           "Number of households"))


##Figure 3
##Plot balance comparison
ggplot(bal.all.p.data.l, aes(x=value, y=new, shape=Balance)) +
    geom_vline(xintercept=0.25, lty=2, lwd=0.75) +
    geom_vline(xintercept=0.0, lty=1, lwd=0.75) +
    geom_point(size=5) +
    xlim(0, 0.8) + theme(legend.position="none") +
    theme_bw() + ggtitle(NULL) +
    xlab("Standardized absolute mean difference") +
    ylab(NULL) +
    theme(plot.title = element_text(size = 24)) +
    theme(axis.title = element_text(size = 16)) +
    theme(axis.text = element_text(size = 12)) +
    theme(legend.text = element_text(size = 12)) +
    theme(legend.title = element_text(size = 14))
ggsave("Figure 3.pdf", device="pdf")


##Statistical analysis of baseline difference
require(glmmTMB)
mod1_2010_common.tmb <- glmmTMB(E_coli_2010 ~ (1 | Community_No) +
                             treat,
                             data = edwin[edwin$In_RCT==1 &
                                          edwin$Same_Site_2010_15==1 &
                                          edwin$Stream_Spring_2015!=2 &
                                          edwin$Category_Site_Intake_Tap =="A", ],
                             family = "nbinom1")
summary(mod1_2010_common.tmb)
mod1_2010_all.tmb <- glmmTMB(E_coli_2010 ~ (1 | Community_No) +
                             treat,
                             data = edwin[edwin$In_RCT==1 &
                                          edwin$Data_Present_2010==1 &
                                          edwin$Category_Site_Intake_Tap =="A",
                                      ], family = "nbinom1")
summary(mod1_2010_all.tmb)

simple.common <- confint(mod1_2010_common.tmb)
simple.all <- confint(mod1_2010_all.tmb)

simple.merge <- as.data.frame(rbind(simple.all, simple.common))
colnames(simple.merge) <- c("li", "ui", "Mean")
simple.merge$common.fac <- as.factor(rep(c("All communities with baseline water data",
                                           "Subset of communities\n with comparable end-line"), each=4))
simple.merge$Predictors <- as.factor(rep(c("Intercept", "Community difference\n at baseline", "Community random effect", "Sigma"), 2)) 

require(ggplot2)
##Fig for reviewers
ggplot(simple.merge[c(1:2, 5:6), ], aes(x = Predictors, y = Mean, ymin = li, ymax = ui)) +
    geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") +
    xlab("Predictors") + facet_wrap(~common.fac) +
    theme_bw() + theme(axis.ticks.y = element_blank()) + theme(plot.title = element_text(size = 24)) +
    theme(axis.title = element_text(size = 18)) + coord_flip()
ggsave("Fig for response.pdf", device="pdf")


##biophysical analysis
require("reshape2")
require("ggplot2")
require("lme4")
require("glmmADMB")
require("MuMIn")
require("dplyr")
require("grid")
df <- edwin

df$Water_System_No <- as.factor(df$Water_System_No)
df$Category_Site_Intake_Tap <- as.factor(df$Category_Site_Intake_Tap)
df$Data_Present_2010 <- as.factor(df$Data_Present_2010)
df$Data_Present_2015 <- as.factor(df$Data_Present_2015)
df$Same_Site_2010_15 <- as.factor(df$Same_Site_2010_15)
df$Data_Complete_2015 <- as.factor(df$Data_Complete_2015)
df$Data_Include_2015 <- as.factor(df$Data_Include_2015)
df$Data_Include_RCT_2015 <- as.factor(df$Data_Include_RCT_2015)
df$In_RCT <- as.factor(df$In_RCT)
df$Treatment_Control_Site <- as.factor(df$Treatment_Control_Site)
df$Stream_Spring_2015 <- as.factor(df$Stream_Spring_2015)
df$Disturbance_2015 <- as.factor(df$Disturbance_2015)
df$Black_Mud_2015 <- as.factor(df$Black_Mud_2015)
df$Faeces_Presence_2015 <- as.factor(df$Faeces_Presence_2015)
df$Cattle_2015 <- as.factor(df$Cattle_2015)
df$Agriculture_2015 <- as.factor(df$Agriculture_2015)
df$Substrate_2015 <- as.factor(df$Substrate_2015)
df$Cattle_Access_2015 <- as.factor(df$Cattle_Access_2015)
df$Compliant_ARA_2015 <- as.factor(df$Compliant_ARA_2015)

mod1_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 +
    Substrate_2015 + Temp_2015 * pH_2015 * Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod1_2015)
AIC(mod1_2015)

mod2_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 +
    Substrate_2015 + Temp_2015 + pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod2_2015)
AIC(mod2_2015)

mod3_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 +
    Temp_2015 + pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod3_2015)
AIC(mod3_2015)

mod4_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 +
    pH_2015 + Salinity_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod4_2015)
AIC(mod4_2015)

mod5_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Agriculture_2015 +
    Turbidity_2015_100, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 ==
    1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod5_2015)
AIC(mod5_2015)

mod6_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Cattle_2015 + Turbidity_2015_100,
    data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 ==
        1, ]), family = "nbinom")
summary(mod6_2015)
AIC(mod6_2015)

mod7_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod7_2015)
AIC(mod7_2015)

mod8_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Category_Site_Intake_Tap +
    Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod8_2015)
AIC(mod8_2015)

mod9_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Stream_Spring_2015 + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod9_2015)
AIC(mod9_2015)

mod10_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Turbidity_2015_100, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod10_2015)
AIC(mod10_2015)

mod11_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod11_2015)
AIC(mod11_2015)

round(Weights(AIC(mod1_2015, mod2_2015, mod3_2015, mod4_2015, mod5_2015, mod6_2015,
    mod7_2015, mod8_2015, mod9_2015, mod10_2015, mod11_2015)), 10)

mod12_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015 +
    Compliant_ARA_2015 + Faeces_Presence_2015, data = (df[df$Data_Complete_2015 ==
    1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod12_2015)
AIC(mod12_2015)

mod13_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015 +
    Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 ==
    1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod13_2015)
AIC(mod13_2015)

mod14_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015 +
    Faeces_Presence_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 ==
    1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod14_2015)
AIC(mod14_2015)

mod15_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015 +
    Cattle_Access_2015, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 ==
    1 & df$Data_Include_2015 == 1, ]), family = "nbinom")
summary(mod15_2015)
AIC(mod15_2015)

mod16_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015,
    data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 ==
        1, ]), family = "nbinom")
summary(mod16_2015)
AIC(mod16_2015)

mod17_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Cattle_Access_2015,
    data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 ==
        1, ]), family = "nbinom")
summary(mod17_2015)
AIC(mod17_2015)

mod18_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Compliant_ARA_2015,
    data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_2015 ==
        1, ]), family = "nbinom")
summary(mod18_2015)
AIC(mod18_2015)

Weights(AIC(mod7_2015, mod12_2015, mod13_2015, mod14_2015, mod15_2015, mod16_2015,
    mod17_2015, mod18_2015))

mod19_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015,
    data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 == 1 & df$Data_Include_RCT_2015 ==
        1, ]), family = "nbinom")
summary(mod19_2015)
AIC(mod19_2015)

mod20_2015 <- glmmadmb(Total_E_coli_2015 ~ (1 | Water_System_No) + Disturbance_2015 +
    Category_Site_Intake_Tap + Stream_Spring_2015 + Turbidity_2015_100 + Faeces_Presence_2015 +
    Treatment_Control_Site, data = (df[df$Data_Complete_2015 == 1 & df$Data_Present_2015 ==
    1 & df$Data_Include_RCT_2015 == 1, ]), family = "nbinom")
summary(mod20_2015)
AIC(mod20_2015)

Weights(AIC(mod19_2015, mod20_2015))

# Coefficient diagram for model 16
mod16_2015_95int <- confint(mod16_2015)
mod16_2015_95int
# Intercept represents 2015 mean count in control sites, 2010=average increase
# 2010/2015
mod16_2015_df <- data.frame(parameter = names(coef(mod16_2015)), Predictors = c("Log-transformed E. coli\n CFU concentration",
    "No disturbance\n of sediment", "Tap compared\n with intake", "Spring intake compared\n with stream intake",
    "Turbidity (FAU/100)", "Faeces present\n in forest", "Faeces present\n in water"),
    coef = coef(mod16_2015), ymin = mod16_2015_95int[, 1], ymax = mod16_2015_95int[,
        2])
target <- c("Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)",
    "Spring intake compared\n with stream intake", "Tap compared\n with intake",
    "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration")
mod16_2015_df[match(target, mod16_2015_df$Predictors), ]
mod16_2015_df$Pred2 <- factor(mod16_2015_df$Predictors, levels = c("Faeces present\n in water",
    "Faeces present\n in forest", "Turbidity (FAU/100)", "Spring intake compared\n with stream intake",
    "Tap compared\n with intake", "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration"))
ggplot(mod16_2015_df, aes(x = Pred2, y = coef, ymin = ymin, ymax = ymax)) +
    geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") +
    xlab("Predictors") + coord_cartesian(ylim = c(-2.55, 4.25)) + theme_bw() + theme(axis.ticks.y = element_blank()) +
    theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) +
    coord_flip()
ggsave("Figure 4.pdf", device="pdf")

# Coefficient diagram for model 20
mod20_2015_95int <- confint(mod20_2015)
mod20_2015_95int
mod20_2015_df <- data.frame(parameter = names(coef(mod20_2015)), Predictors = c("Log-transformed E. coli\n CFU concentration at\n control community sites",
    "No disturbance\n of sediment", "Tap compared\n with intake", "Spring intake compared\n with stream intake",
    "Turbidity (FAU/100)", "Faeces present\n in forest", "Faeces present\n in water",
    "Treatment community site"), coef = coef(mod20_2015), ymin = mod20_2015_95int[,
    1], ymax = mod20_2015_95int[, 2])
target <- c("Treatment community site", "Faeces present\n in water", "Faeces present\n in forest",
    "Turbidity (FAU/100)", "Spring intake compared\n with stream intake", "Tap compared\n with intake",
    "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration at\n control community sites")
mod20_2015_df[match(target, mod20_2015_df$Predictors), ]
mod20_2015_df$Pred2 <- factor(mod20_2015_df$Predictors, levels = c("Treatment community site",
    "Faeces present\n in water", "Faeces present\n in forest", "Turbidity (FAU/100)",
    "Spring intake compared\n with stream intake", "Tap compared\n with intake",
    "No disturbance\n of sediment", "Log-transformed E. coli\n CFU concentration at\n control community sites"))
ggplot(mod20_2015_df, aes(x = Pred2, y = coef, ymin = ymin, ymax = ymax)) +
    geom_hline(yintercept = 0) + geom_pointrange(colour = "black") + ylab("Predictor variable coefficient value") +
    xlab("Predictors") + coord_cartesian(ylim = c(-2.55, 4.25)) + theme_bw() + theme(axis.ticks.y = element_blank()) +
    theme(plot.title = element_text(size = 24)) + theme(axis.title = element_text(size = 18)) +
    coord_flip()
ggsave("Supplementary biophysical figure.pdf", device="pdf")