library(metafor)

#### Import data & prepare dataset

data <- read.csv("Supplemental Data S1.NoInsecticide.csv")

### Prepare the dataset by removing unnecessary observations, etc.
data1<-subset(data, 
                exclude==0 &
                (nontarget_functional_group=="omnivore"|
                   nontarget_functional_group=="mixed"|
                   nontarget_functional_group=="predator"|
                   nontarget_functional_group=="parasitoid") &
                (response_variable_abbrev=="activity-density"|
                   response_variable_abbrev=="abundance"|
                   response_variable_abbrev=="parasitism")) 

### Create new dataset with fewer variables
myvars<-c("id","study","peer_reviewed","continent","crop_species",
          "rate_g_kg_seed", "rate_mg_seed","variety","planting_date",
          "crop_group","neonic_active_ingr","neonic_group","site_yr_ID",
          "plot_size","true_control_sample_size","true_expmtl_sample_size",
          "nontarget_location","nontarget_phylum","nontarget_class",
          "nontarget_insect","nontarget_order","nontarget_family",
          "nontarget_genus","nontarget_species","nontarget_finest_grouping",
          "nontarget_functional_group","nontarget_stage","control_mean",
          "expmtl_mean","control_std_dev","expmtl_std_dev","control_RSD",
          "early_samp_prop","nontarget_taxa","response_variable_abbrev")
newdata <- data1[myvars]

### Make sure all continuous variables are stored as numbers
newdata$plot_size<-as.numeric(as.character(newdata$plot_size))
newdata$true_control_sample_size<-as.numeric(as.character(newdata$true_control_sample_size))
newdata$expmtl_sample_size<-as.numeric(as.character(newdata$true_expmtl_sample_size))
newdata$control_std_dev<-as.numeric(as.character(newdata$control_std_dev))
newdata$expmtl_std_dev<-as.numeric(as.character(newdata$expmtl_std_dev))
newdata$control_RSD<-as.numeric(as.character(newdata$control_RSD))
newdata$early_samp_prop<-as.numeric(as.character(newdata$early_samp_prop))
str(newdata)

### Calculate effect sizes and variance
newdataES<-escalc(measure="SMD", n1i=true_expmtl_sample_size, 
                  n2i=true_control_sample_size,
                  m1i=expmtl_mean, m2i=control_mean, sd1i=expmtl_std_dev, 
                  sd2i=control_std_dev, data = newdata, vtype = "UB")

### Set up variables for the model
insect<-factor(newdataES[,"nontarget_insect"],levels=c("Insect","Non-insect"))
habitat<-factor(newdataES[,"nontarget_location"],levels=c("soil-associated","aboveground"))
fxn_grp<-factor(newdataES[,"nontarget_functional_group"],levels=c("mixed","omnivore","predator","parasitoid"))
crop<-factor(newdataES[,"crop_group"],levels=c("maize","soybean","other"))
ai<-factor(newdataES[,"neonic_group"],levels=c("IMI","THX/CLO"))
review<-factor(newdataES[,"peer_reviewed"],levels=c("yes","no"))
plotsize<-newdataES[,"plot_size"]
early<-newdataES[,"early_samp_prop"]
taxa<-newdataES[,"nontarget_taxa"]
hist(plotsize)
hist(log(plotsize))
plotsize<-log(plotsize) #transform b/c of crazy distribution
plotsize<-scale(plotsize, scale = FALSE) # center log plot size
hist(early)
hist(log(early+.1))
early<-log(early+.1) #transform to make more normal
early<-scale(early, scale=FALSE) # center proportion early

#### Assess potential collinearity among predictors

### Examine collinearity among predictors via GVIF
library(nlme)
mod<-lme(yi ~ insect+habitat+fxn_grp+crop+ai+review+plotsize+early, 
         random=~1|study/site_yr_ID, data=newdataES)
library(car)
vif(mod)

### Examine collinearity among predictors via correlations
design<-model.matrix(~insect+habitat+fxn_grp+crop+ai+review+plotsize+early)
design <- design[,c(-1)]
design<-scale(design)
S<-cov(design)
S.vec<-c(S)
S.vec<-S.vec[S.vec<.999] # exclude self-correlations (=1)
hist(S.vec, breaks=10, main="Correlations among moderators")
summary(S.vec)
small<-S.vec<0.2
table(small)

### Look at correlation structure of predictors using PCA/varimax rotation
S.Eig<-eigen(S)
E_S<-S.Eig$vectors
Lamb_S<-S.Eig$values
par(mfrow=c(1,2))
plot(Lamb_S,main="Scree Plot")
plot(cumsum(Lamb_S)/sum(Lamb_S),main="Explained Variance")
abline(h=0.85,col="red")
abline(v=1:12,col="grey",lty=2)
L<-S.Eig$vectors[,1:11]%*%diag(sqrt(S.Eig$values[1:11])) # take e-vectors*sqrt(e-values)-> L matrix
L_PC<-varimax(L)$loadings # rotate L to be more interpretable
barplot(-L_PC[,1],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8))
barplot(-L_PC[,2],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8))
barplot(-L_PC[,3],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8))
barplot(-L_PC[,4],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8))
# looks pretty good, no strong correlations btw moderators

#### Fit meta-analysis models

### Fit moderator-free models for overall effect size & heterogeneity
## Simple random-effects model
null_RE<-rma.mv(yi, vi, data=newdataES, method="REML")

## Model that accounts for clustering of observations w/in studies
null_SS <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML")

## Examine likelihood plots for the random effects
# This takes a long time!
# http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011
profile(null_SS,sigma2=1)
profile(null_SS,sigma2=2)
# It looks like site-year explains most of the clustering w/in studies

## Examine overall effect size and heterogeneity tests
summary(null_RE)
summary(null_SS)

## Calculate I-squared (same for these two models)
# See Higgins et al. 2003
I2<-((622.5037-606)/622.5037)*100

## Leave one out analysis for the study-site-yr model (null_SS)
res2Ahm <- rma.mv(yi, vi,  
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Ahmad20052006"))
res2AlD <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="AlDeeb2003")) 
res2Bh <- rma.mv(yi, vi, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Bhatti2005")) 
res2Ch <- rma.mv(yi, vi, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Charlet2007"))
res2Dg <- rma.mv(yi, vi, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Douglas2015"))
res2Ech <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Echegaray2009")) 
res2Hal <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Hallett2014"))
res2Har <- rma.mv(yi, vi,
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Harmon2006"))
res2HB <- rma.mv(yi, vi,
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="HeidelBaker2012Carter2013"))
res2Kr <- rma.mv(yi, vi,
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Krauter2001"))
res2Ohn <- rma.mv(yi, vi,
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Ohnesorg2009"))
res2Sg <- rma.mv(yi, vi,  
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Seagraves2012"))
res2Tin <- rma.mv(yi, vi,
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Tinsley2012"))
res2Alb <- rma.mv(yi, vi,  
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Albajes2003"))
res2Bab <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Babendreier2015"))
res2Bak <- rma.mv(yi, vi, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Baker2002"))
res2delaP <- rma.mv(yi, vi,  
                    random = ~ 1 | study/site_yr_ID, data=newdataES,
                    subset=(study!="delaPoza2005Farinos2008"))
res2Schm <- rma.mv(yi, vi,
                   random = ~ 1 | study/site_yr_ID, data=newdataES,
                   subset=(study!="Schmidt2002"))
res2SC <- rma.mv(yi, vi,
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="SoteloCardona2010"))

print(null_SS) # full model for comparison
print(res2Ahm,digits=3)
print(res2AlD,digits=3)
print(res2Bh,digits=3) #Q test becomes marginal (P = 0.06)
print(res2Ch,digits=3)
print(res2Dg,digits=3)
print(res2Ech,digits=3)
print(res2Hal,digits=3)
print(res2Har,digits=3)
print(res2HB,digits=3)
print(res2Kr,digits=3)
print(res2Ohn,digits=3) 
print(res2Sg,digits=3)
print(res2SC,digits=3) 
print(res2Tin,digits=3)
print(res2Alb,digits=3)
print(res2Bab,digits=3)
print(res2Bak,digits=3)
print(res2delaP,digits=3)
print(res2Schm,digits=3)

## Other diagnostics for the site-yr/study model
res<-rstandard(null_SS)
fit<-fitted(null_SS)
library(car)
qqPlot(res$resid) # long tails, but not too bad
hat<-hatvalues(null_SS)
plot(res$resid~hat)
abline(h=0)
outlier<-as.data.frame(abs(res$resid) > 3) 
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
newdataESout<-newdataES[-c(118,561), ] # remove outliers (residual >3 or <-3)
hist(newdataESout$yi) 
null_SS_out<- rma.mv(yi, vi, 
                 random = ~ 1 | study/site_yr_ID, data=newdataESout) # model w/o outliers
summary(null_SS_out)
# results very similar w/ outliers excluded

### Fit moderator model to test the influence of other variables on effect size

## Create design matrix with effects coding for moderators
design2<-model.matrix(~insect+habitat+fxn_grp+crop+ai+review+plotsize+early,
                      contrasts=list(crop="contr.sum",
                                     habitat="contr.sum",
                                     fxn_grp="contr.sum",
                                     insect="contr.sum",
                                     ai="contr.sum",
                                     review="contr.sum"))
design2 <- design2[,c(-1)]

## Fit model
mod<-rma.mv(yi, vi, mods= ~design2, 
            random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML")

## Examine effect size and heterogeneity tests
summary(mod)

## Calculate I-squared 
# See Higgins et al. 2003
I2<-((584.4791-595)/584.4791)*100 # by convention < 0 set to 0

## Examine likelihood plots for each of the random effects
# This takes a long time!
# http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011
profile(mod,sigma2=1) # zero estimate; does not seem to be identifiable
profile(mod,sigma2=2) # this seems identifiable

## Test significance of moderators
# http://www.metafor-project.org/doku.php/tips:testing_factors_lincoms
anova(mod, btt=c(2)) #insect, p = .0032
anova(mod, btt=c(3)) #habitat, p = .234
anova(mod, btt=c(4,5,6)) #fxn grp, p = .132
anova(mod, btt=c(7,8)) #crop, p = .674
anova(mod, btt=c(9)) #ai, p = .320
anova(mod, btt=c(10)) #peer review, p = .477
anova(mod, btt=c(11)) #plot size, p = .559
anova(mod, btt=c(12)) #early season, p = .435

## Generate CI's from fitted moderator model

# Store mean values for moderator variables in the dataset
predmeans<-as.data.frame(colMeans(design2))
names(predmeans)[names(predmeans)=="colMeans(design2)"] <- "mean"

# Generate CI's weighted similarly to moderator representation in dataset 
# For insects and non-insects, all other values weighted mean across groups
CI_ins<-predict(mod, newmods=c(1,predmeans[2:11,]), addx=TRUE) 
CI_nonins<-predict(mod, newmods=c(-1,predmeans[2:11,]), addx=TRUE)

## Leave-one-out analysis, moderator model
resAhm <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Ahmad20052006"))
resAlD <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="AlDeeb2003")) 
resBh <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Bhatti2005")) 
resCh <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Charlet2007"))
resDg <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Douglas2015"))
resEch <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Echegaray2009")) 
resHal <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Hallett2014"))
resHar <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Harmon2006"))
resHB <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="HeidelBaker2012Carter2013"))
resKr <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Krauter2001"))
resOhn <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Ohnesorg2009"))
resSg <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="Seagraves2012"))
resTin <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Tinsley2012"))
resAlb <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Albajes2003"))
resBab <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Babendreier2015"))
resBak <- rma.mv(yi, vi, mods= ~design2, 
                 random = ~ 1 | study/site_yr_ID, data=newdataES,
                 subset=(study!="Baker2002"))
resdelaP <- rma.mv(yi, vi, mods= ~design2, 
                   random = ~ 1 | study/site_yr_ID, data=newdataES,
                   subset=(study!="delaPoza2005Farinos2008"))
resSchm <- rma.mv(yi, vi, mods= ~design2, 
                  random = ~ 1 | study/site_yr_ID, data=newdataES,
                  subset=(study!="Schmidt2002"))
resSC <- rma.mv(yi, vi, mods= ~design2, 
                random = ~ 1 | study/site_yr_ID, data=newdataES,
                subset=(study!="SoteloCardona2010"))

print(mod) # full model for comparison
print(resAhm,digits=3)
print(resAlD,digits=3)
print(resBh,digits=3)
print(resCh,digits=3)
print(resDg,digits=3)
print(resEch,digits=3)
print(resHal,digits=3)
print(resHar,digits=3)
print(resHB,digits=3)
print(resKr,digits=3)
print(resOhn,digits=3) # Q turned to P = 0.057
print(resSg,digits=3)
print(resSC,digits=3) # Q turned to P = 0.338
print(resTin,digits=3)
print(resAlb,digits=3)
print(resBab,digits=3)
print(resBak,digits=3)
print(resdelaP,digits=3)
print(resSchm,digits=3)

## Other diagnostics for the moderator model
res<-rstandard(mod)
fit<-fitted(mod)
library(car)
qqPlot(res$resid)
hat<-hatvalues(mod)
plot(res$resid~fit)
plot(res$resid~hat)
abline(h=0)
plot(res$resid~insect)
plot(res$resid~habitat)
plot(res$resid~fxn_grp)
plot(res$resid~crop)
plot(res$resid~ai)
plot(res$resid~review)
plot(res$resid~plotsize)
plot(res$resid~early)
outlier<-as.data.frame(abs(res$resid) > 3)
names(outlier)[1]<-paste("res")
which(outlier$res==TRUE)
newdataESout<-newdataES[-c(118,561), ] # remove outliers
design3<-design2[-c(118,561),] # refit design matrix w/o outliers
hist(newdataESout$yi)
modout <- rma.mv(yi, vi, mods= ~design3, 
                 random = ~ 1 | study/site_yr_ID, data=newdataESout) #refit model w/o outliers
summary(modout)
anova(modout, btt=c(2)) #insect, p = .0038
anova(modout, btt=c(3)) #habitat, p = .271
anova(modout, btt=c(4,5,6)) #fxn grp, p = .156
anova(modout, btt=c(7,8)) #crop, p = .696
anova(modout, btt=c(9)) #ai, p = .312
anova(modout, btt=c(10)) #peer review, p = .488
anova(modout, btt=c(11)) #plot size, p = .547
anova(modout, btt=c(12)) #early season, p = .437
# results very similar w/ outliers excluded

#### Publication bias - these tools only seem to be available for rma command
### Unclear how to test this with clustered/hierarchical data structure
fsn(yi, vi, data=newdataES, type="Rosenberg") #Rosenberg's fail-safe N
null<-rma(yi, vi, data=newdataES, method="REML")
trimfill(null) #suggests no missing observations

### Weighted histogram of effect sizes
library(ggplot2)

ggplot(newdataES, aes(x=yi, fill=study, weight=(1/newdataES$vi))) +
  geom_histogram(binwidth=0.35, closed="right") +
  geom_vline(xintercept = 0) +
  theme_bw(base_size=22) +
  xlab(expression(paste("Effect size (", italic("d"),")"))) +
  ylab("") +
  scale_fill_manual(values = 
                      c("dodgerblue2","#E31A1C", # red
                        "green4",
                        "#6A3D9A", # purple
                        "#FF7F00", # orange
                        "brown","gold1",
                        "skyblue2","#FB9A99", # lt pink
                        "palegreen2",
                        "#CAB2D6", # lt purple
                        "#FDBF6F", # lt orange
                        "gray70", "khaki2",
                        "maroon","orchid1","deeppink1","blue1","steelblue4",
                        "darkturquoise"))