library(metafor)

#### Import data & prepare dataset

data <- read.csv("Supplemental Data S1.Pyrethroid.csv")

### Prepare the dataset by removing unnecessary observations, etc.
data1<-subset(data, 
                exclude==0 &
                (nontarget_functional_group=="omnivore"|
                   nontarget_functional_group=="mixed"|
                   nontarget_functional_group=="predator"|
                   nontarget_functional_group=="parasitoid") &
                (response_variable_abbrev=="activity-density"|
                   response_variable_abbrev=="abundance"|
                   response_variable_abbrev=="parasitism") &
                (insecticide_app_site=="soil"|
                   insecticide_app_site=="foliar"))

# Because of fewer studies, divided crop group into only maize and other
library(plyr)
data1$crop_group <- revalue(data1$crop_group, c("maize"="maize", 
                                               "soybean"="other",
                                               "other"="other"))

### Create new dataset with fewer variables
myvars<-c("id","study","peer_reviewed","continent","crop_species",
          "rate_g_kg_seed", "rate_mg_seed","variety","planting_date",
          "crop_group","neonic_active_ingr","neonic_group","site_yr_ID",
          "plot_size","true_control_sample_size","true_expmtl_sample_size",
          "nontarget_location","nontarget_phylum","nontarget_class",
          "nontarget_insect","nontarget_order","nontarget_family",
          "nontarget_genus","nontarget_species","nontarget_finest_grouping",
          "nontarget_functional_group","nontarget_stage","control_mean",
          "expmtl_mean","control_std_dev","expmtl_std_dev","control_RSD",
          "early_samp_prop", "nontarget_taxa","response_variable_abbrev",
          "insecticide_method_app","insecticide_app_site")
newdata <- data1[myvars]

### Make sure all continuous variables are stored as numbers
newdata$plot_size<-as.numeric(as.character(newdata$plot_size))
newdata$true_control_sample_size<-as.numeric(as.character(newdata$true_control_sample_size))
newdata$expmtl_sample_size<-as.numeric(as.character(newdata$true_expmtl_sample_size))
newdata$control_std_dev<-as.numeric(as.character(newdata$control_std_dev))
newdata$expmtl_std_dev<-as.numeric(as.character(newdata$expmtl_std_dev))
newdata$control_RSD<-as.numeric(as.character(newdata$control_RSD))
newdata$early_samp_prop<-as.numeric(as.character(newdata$early_samp_prop))
str(newdata)

### Calculate effect sizes and variance
newdataES<-escalc(measure="SMD", n1i=true_expmtl_sample_size, 
                  n2i=true_control_sample_size,
                  m1i=expmtl_mean, m2i=control_mean, sd1i=expmtl_std_dev, 
                  sd2i=control_std_dev, data = newdata, vtype = "UB")

### Set up variables for the model
insect<-factor(newdataES[,"nontarget_insect"],levels=c("Insect","Non-insect"))
habitat<-factor(newdataES[,"nontarget_location"],levels=c("soil-associated","aboveground"))
fxn_grp<-factor(newdataES[,"nontarget_functional_group"],levels=c("mixed","omnivore","predator","parasitoid"))
crop<-factor(newdataES[,"crop_group"],levels=c("maize","other"))
ai<-factor(newdataES[,"neonic_group"],levels=c("IMI","THX/CLO"))
review<-factor(newdataES[,"peer_reviewed"],levels=c("yes","no"))
plotsize<-newdataES[,"plot_size"]
early<-newdataES[,"early_samp_prop"]
taxa<-newdataES[,"nontarget_taxa"]
hist(plotsize)
hist(log(plotsize))
plotsize<-scale(plotsize, scale = FALSE) # center plot size
hist(early)
early<-scale(early, scale=FALSE)
app<-factor(newdataES[,"insecticide_app_site"],levels=c("soil","foliar"))

#### Assess potential collinearity among predictors

### Examine collinearity among predictors via GVIF
library(nlme)
mod<-lme(yi ~ insect+habitat+ai+plotsize+early+app, 
         random=~1|study/site_yr_ID, data=newdataES)
anova(mod)
library(car)
vif(mod)

### Examine collinearity among predictors via correlations
design<-model.matrix(~insect+habitat+ai+plotsize+early+app)
design <- design[,c(-1)]
design<-scale(design)
S<-cov(design)
S.vec<-c(S)
S.vec<-S.vec[S.vec<.999] # exclude self-correlations (=1)
hist(S.vec, breaks=10, main="Correlations among moderators")
summary(S.vec)
small<-S.vec<0.2
table(small)

### Look at correlation structure of predictors using PCA/varimax rotation
S.Eig<-eigen(S)
E_S<-S.Eig$vectors
Lamb_S<-S.Eig$values
par(mfrow=c(1,2))
plot(Lamb_S,main="Scree Plot")
plot(cumsum(Lamb_S)/sum(Lamb_S),main="Explained Variance")
abline(h=0.85,col="red")
abline(v=1:12,col="grey",lty=2)
L<-S.Eig$vectors[,1:6]%*%diag(sqrt(S.Eig$values[1:6])) # take e-vectors*sqrt(e-values)-> L matrix
L_PC<-varimax(L)$loadings # rotate L to be more interpretable
barplot(-L_PC[,1],names=names(design),col=c(1,2,3,4,5,6,7))
barplot(-L_PC[,2],names=names(design),col=c(1,2,3,4,5,6,7))
barplot(-L_PC[,3],names=names(design),col=c(1,2,3,4,5,6,7))
barplot(-L_PC[,4],names=names(design),col=c(1,2,3,4,5,6,7))

#### Fit meta-analysis models

### Fit moderator-free models for overall effect & heterogeneity
## Simple random-effects model
null_RE<-rma.mv(yi, vi, data=newdataES, method="REML")

## Model that accounts for clustering of observations w/in studies
null_SS <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML")

## Examine likelihood plots for the random effects
# This takes a long time!
# http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011
profile(null_SS,sigma2=1)
profile(null_SS,sigma2=2)
# It looks like study explains most of the clustering

## Examine overall effect size and heterogeneity tests
summary(null_RE)
summary(null_SS)

## Calculate I-squared (same for these two models)
# See Higgins et al. 2003
I2<-((369.6241-383)/369.6241)*100 # < 0 reported as zero by convention

## Leave one out analysis, study-site-yr model (null_SS)
res2Ahm <- rma.mv(yi, vi,  
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Ahmad20052006"))
res2AlD <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="AlDeeb2003")) 
res2Bh <- rma.mv(yi, vi, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Bhatti2005")) 
res2Ech <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Echegaray2009")) 
res2Hal <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Hallett2014"))
res2Ohn <- rma.mv(yi, vi,
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Ohnesorg2009"))
res2Bab <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Babendreier2015"))
res2Bak <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Baker2002"))

print(null_SS) # full model for comparison
print(res2Ahm,digits=3)
print(res2AlD,digits=3)
print(res2Bh,digits=3) 
print(res2Ech,digits=3)
print(res2Hal,digits=3)
print(res2Ohn,digits=3) # big change in effect size & conf. intervals
print(res2Bab,digits=3)
print(res2Bak,digits=3)

## Other diagnostics for site-yr/study model
res<-rstandard(null_SS)
fit<-fitted(null_SS)
library(car)
qqPlot(res$resid) # some pretty extreme outliers
hat<-hatvalues(null_SS)
plot(res$resid~hat) # here too, but outliers have low leverage
abline(h=0)
outlier<-as.data.frame(abs(res$resid) > 3)
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
newdataESout<-newdataES[-c(133,157,159,169,171,268,285,383), ] #remove outliers
hist(newdataESout$yi) 
null_SS_out<- rma.mv(yi, vi, 
                     random = ~ 1 | study/site_yr_ID, data=newdataESout) # model w/o outliers
summary(null_SS_out)
# results similar w/ outliers excluded

### Refit moderator-free models, excluding Ohnesorg et al 2009
null_RE_ohn<-rma.mv(yi, vi, data=newdataES, method="REML",
                    subset=(study!="Ohnesorg2009"))
null_SS_ohn <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML",
                      subset=(study!="Ohnesorg2009"))

## Examine likelihood plots for the random effects
profile(null_SS_ohn,sigma2=1)
profile(null_SS_ohn,sigma2=2)

## Examine overall effect size and heterogeneity tests
summary(null_RE_ohn)
summary(null_SS_ohn)

## Calculate I-squared
I2_ohn<-((316.9-375)/316.9)*100 # < 0 reported as zero by convention

## Other diagnostics for site-yr/study model, excluding Ohnesorg
res<-rstandard(null_SS_ohn)
fit<-fitted(null_SS_ohn)
library(car)
qqPlot(res$resid) # still some outliers
hat<-hatvalues(null_SS_ohn)
plot(res$resid~hat) # outliers w/ low leverage
abline(h=0)
outlier<-as.data.frame(abs(res$resid) > 3)
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
newdataESout<-newdataES[-c(133,157,159,169,171,199, 268,285), ] #remove outliers
hist(newdataESout$yi) 
null_SS_ohn_out<- rma.mv(yi, vi, 
                     random = ~ 1 | study/site_yr_ID, data=newdataESout,
                     subset=(study!="Ohnesorg2009")) # model w/o outliers
summary(null_SS_ohn_out)
# results very similar w/ outliers excluded

### Fit moderator model to test the influence of other variables on effect size

## Create design matrix with effects coding and fit study-level model

design2<-model.matrix(~insect+habitat+ai+plotsize+early+app,
                      contrasts=list(habitat="contr.sum",
                                     insect="contr.sum",
                                     ai="contr.sum",
                                     app="contr.sum"))
design2 <- design2[,c(-1)]

mod <- rma.mv(yi, vi, mods= ~design2, 
              random = ~ 1 | study/site_yr_ID, data=newdataES)

## Examine effect size and heterogeneity tests
summary(mod)

## Calculate I-squared
# See Higgins et al. 2003
I2<-((360.8055-377)/360.8055)*100 # by convention < 0 set to 0

## Examine likelihood plots for each of the random effects
# Takes a long time!
# http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011
profile(mod,sigma2=1)
profile(mod,sigma2=2)

## Leave-one-out analysis, moderator analysis
resAhm <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Ahmad20052006"))
resAlD <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="AlDeeb2003")) 
resBh <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Bhatti2005")) 
resEch <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Echegaray2009")) 
resHal <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Hallett2014"))
resOhn <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Ohnesorg2009"))
resBab <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Babendreier2015"))
resBak <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Baker2002"))

print(mod) # full model for comparison
print(resAhm,digits=3)
print(resAlD,digits=3)
print(resBh,digits=3)
print(resEch,digits=3)
print(resHal,digits=3)
print(resOhn,digits=3) # intercept quite different
print(resBab,digits=3)
print(resBak,digits=3)

## Other diagnostics for the moderator model
res<-rstandard(mod)
fit<-fitted(mod)
hat<-hatvalues(mod)
plot(res$resid~fit)
plot(res$resid~hat)
abline(h=0)
library(car)
qqPlot(res$resid) # again big outliers
plot(res$resid~insect)
plot(res$resid~habitat)
plot(res$resid~fxn_grp)
plot(res$resid~crop)
plot(res$resid~ai)
plot(res$resid~review)
plot(res$resid~plotsize)
plot(res$resid~early)
outlier<-as.data.frame(abs(res$resid) > 3)
ex.outlier<-as.data.frame(abs(res$resid) > 8) # category for just extreme outliers
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
names(ex.outlier)[1]<-paste("res")
which(ex.outlier$res==TRUE)
newdataESout<-newdataES[-c(157,159,171,268,285), ]
design3<-design2[-c(157,159,171,268,285),]
newdataESexout<-newdataES[-c(157,159), ]
design4<-design2[-c(157,159),]
hist(newdataESout$yi)
hist(newdataESexout$yi)
modout <- rma.mv(yi, vi, mods= ~design3, 
                 random = ~ 1 | study/site_yr_ID, data=newdataESout) # overall model
modexout <- rma.mv(yi, vi, mods= ~design4, 
                 random = ~ 1 | study/site_yr_ID, data=newdataESexout) 
print(modout)
print(modexout)
# results very similar w/ outliers excluded

### Refit moderator model, excluding Ohnesorg et al. 2009
mod_ohn <- rma.mv(yi, vi, mods= ~design2, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Ohnesorg2009")) 

## Examine effect size and heterogeneity tests
summary(mod_ohn)

## Calculate I-squared 
# See Higgins et al. 2003
I2<-((309.4376-369)/309.4376)*100 # by convention < 0 set to 0

## Diagnostics for moderator model w/o Ohnesorg et al. 2009
res<-rstandard(mod_ohn)
fit<-fitted(mod_ohn)
hat<-hatvalues(mod_ohn)
plot(res$resid~fit)
plot(res$resid~hat)
abline(h=0)
library(car)
qqPlot(res$resid) # again big outliers
outlier<-as.data.frame(abs(res$resid) > 3)
ex.outlier<-as.data.frame(abs(res$resid) > 8)
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
names(ex.outlier)[1]<-paste("res")
which(ex.outlier$res==TRUE)
newdataESout<-newdataES[-c(133,157,159,169,171,268,285), ]
design3<-design2[-c(133,157,159,169,171,268,285),]
newdataESexout<-newdataES[-c(157,159), ]
design4<-design2[-c(157,159),]
hist(newdataESout$yi)
hist(newdataESexout$yi)
mod_ohnout <- rma.mv(yi, vi, mods= ~design3, 
                 random = ~ 1 | study/site_yr_ID, data=newdataESout,
                 subset=(study!="Ohnesorg2009")) # overall model
mod_ohnexout <- rma.mv(yi, vi, mods= ~design4, 
                   random = ~ 1 | study/site_yr_ID, data=newdataESexout,
                   subset=(study!="Ohnesorg2009")) 
print(mod_ohnout)
print(mod_ohnexout)
# results very similar w/ outliers excluded

#### Publication bias - these tools only seem to be available for rma command
### Unclear how to test this with clustered/hierarchical data structure
### Also not very relevant here b/c CI encloses zero in all models
fsn(yi, vi, data=newdataES, type="Rosenberg") #Rosenberg's fail-safe N
null<-rma(yi, vi, data=newdataES, method="REML")
trimfill(null) #suggests no missing observations

### Weighted histogram of effect sizes

library(ggplot2)

ggplot(newdataES, aes(x=yi, fill=study, weight=(1/newdataES$vi))) +
  geom_histogram(binwidth=0.3, closed="right") +
  geom_vline(xintercept = 0) +
  theme_bw(base_size=22) +
  xlab(expression(paste("Effect size (", italic("d"),")"))) +
  ylab("") +
  scale_fill_manual(values = 
                      c("dodgerblue2","#E31A1C", # red
                        "green4",
                        "#6A3D9A", # purple
                        "#FF7F00", # orange
                        "brown","gold1",
                        "skyblue2","#FB9A99", # lt pink
                        "palegreen2",
                        "#CAB2D6", # lt purple
                        "#FDBF6F", # lt orange
                        "gray70", "khaki2",
                        "maroon","orchid1","deeppink1","blue1","steelblue4",
                        "darkturquoise"))