library(metafor) #### Import data & prepare dataset data <- read.csv("Supplemental Data S1.Pyrethroid.csv") ### Prepare the dataset by removing unnecessary observations, etc. data1<-subset(data, exclude==0 & (nontarget_functional_group=="omnivore"| nontarget_functional_group=="mixed"| nontarget_functional_group=="predator"| nontarget_functional_group=="parasitoid") & (response_variable_abbrev=="activity-density"| response_variable_abbrev=="abundance"| response_variable_abbrev=="parasitism") & (insecticide_app_site=="soil"| insecticide_app_site=="foliar")) # Because of fewer studies, divided crop group into only maize and other library(plyr) data1$crop_group <- revalue(data1$crop_group, c("maize"="maize", "soybean"="other", "other"="other")) ### Create new dataset with fewer variables myvars<-c("id","study","peer_reviewed","continent","crop_species", "rate_g_kg_seed", "rate_mg_seed","variety","planting_date", "crop_group","neonic_active_ingr","neonic_group","site_yr_ID", "plot_size","true_control_sample_size","true_expmtl_sample_size", "nontarget_location","nontarget_phylum","nontarget_class", "nontarget_insect","nontarget_order","nontarget_family", "nontarget_genus","nontarget_species","nontarget_finest_grouping", "nontarget_functional_group","nontarget_stage","control_mean", "expmtl_mean","control_std_dev","expmtl_std_dev","control_RSD", "early_samp_prop", "nontarget_taxa","response_variable_abbrev", "insecticide_method_app","insecticide_app_site") newdata <- data1[myvars] ### Make sure all continuous variables are stored as numbers newdata$plot_size<-as.numeric(as.character(newdata$plot_size)) newdata$true_control_sample_size<-as.numeric(as.character(newdata$true_control_sample_size)) newdata$expmtl_sample_size<-as.numeric(as.character(newdata$true_expmtl_sample_size)) newdata$control_std_dev<-as.numeric(as.character(newdata$control_std_dev)) newdata$expmtl_std_dev<-as.numeric(as.character(newdata$expmtl_std_dev)) newdata$control_RSD<-as.numeric(as.character(newdata$control_RSD)) newdata$early_samp_prop<-as.numeric(as.character(newdata$early_samp_prop)) str(newdata) ### Calculate effect sizes and variance newdataES<-escalc(measure="SMD", n1i=true_expmtl_sample_size, n2i=true_control_sample_size, m1i=expmtl_mean, m2i=control_mean, sd1i=expmtl_std_dev, sd2i=control_std_dev, data = newdata, vtype = "UB") ### Set up variables for the model insect<-factor(newdataES[,"nontarget_insect"],levels=c("Insect","Non-insect")) habitat<-factor(newdataES[,"nontarget_location"],levels=c("soil-associated","aboveground")) fxn_grp<-factor(newdataES[,"nontarget_functional_group"],levels=c("mixed","omnivore","predator","parasitoid")) crop<-factor(newdataES[,"crop_group"],levels=c("maize","other")) ai<-factor(newdataES[,"neonic_group"],levels=c("IMI","THX/CLO")) review<-factor(newdataES[,"peer_reviewed"],levels=c("yes","no")) plotsize<-newdataES[,"plot_size"] early<-newdataES[,"early_samp_prop"] taxa<-newdataES[,"nontarget_taxa"] hist(plotsize) hist(log(plotsize)) plotsize<-scale(plotsize, scale = FALSE) # center plot size hist(early) early<-scale(early, scale=FALSE) app<-factor(newdataES[,"insecticide_app_site"],levels=c("soil","foliar")) #### Assess potential collinearity among predictors ### Examine collinearity among predictors via GVIF library(nlme) mod<-lme(yi ~ insect+habitat+ai+plotsize+early+app, random=~1|study/site_yr_ID, data=newdataES) anova(mod) library(car) vif(mod) ### Examine collinearity among predictors via correlations design<-model.matrix(~insect+habitat+ai+plotsize+early+app) design <- design[,c(-1)] design<-scale(design) S<-cov(design) S.vec<-c(S) S.vec<-S.vec[S.vec<.999] # exclude self-correlations (=1) hist(S.vec, breaks=10, main="Correlations among moderators") summary(S.vec) small<-S.vec<0.2 table(small) ### Look at correlation structure of predictors using PCA/varimax rotation S.Eig<-eigen(S) E_S<-S.Eig$vectors Lamb_S<-S.Eig$values par(mfrow=c(1,2)) plot(Lamb_S,main="Scree Plot") plot(cumsum(Lamb_S)/sum(Lamb_S),main="Explained Variance") abline(h=0.85,col="red") abline(v=1:12,col="grey",lty=2) L<-S.Eig$vectors[,1:6]%*%diag(sqrt(S.Eig$values[1:6])) # take e-vectors*sqrt(e-values)-> L matrix L_PC<-varimax(L)$loadings # rotate L to be more interpretable barplot(-L_PC[,1],names=names(design),col=c(1,2,3,4,5,6,7)) barplot(-L_PC[,2],names=names(design),col=c(1,2,3,4,5,6,7)) barplot(-L_PC[,3],names=names(design),col=c(1,2,3,4,5,6,7)) barplot(-L_PC[,4],names=names(design),col=c(1,2,3,4,5,6,7)) #### Fit meta-analysis models ### Fit moderator-free models for overall effect & heterogeneity ## Simple random-effects model null_RE<-rma.mv(yi, vi, data=newdataES, method="REML") ## Model that accounts for clustering of observations w/in studies null_SS <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML") ## Examine likelihood plots for the random effects # This takes a long time! # http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011 profile(null_SS,sigma2=1) profile(null_SS,sigma2=2) # It looks like study explains most of the clustering ## Examine overall effect size and heterogeneity tests summary(null_RE) summary(null_SS) ## Calculate I-squared (same for these two models) # See Higgins et al. 2003 I2<-((369.6241-383)/369.6241)*100 # < 0 reported as zero by convention ## Leave one out analysis, study-site-yr model (null_SS) res2Ahm <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ahmad20052006")) res2AlD <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="AlDeeb2003")) res2Bh <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Bhatti2005")) res2Ech <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Echegaray2009")) res2Hal <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Hallett2014")) res2Ohn <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ohnesorg2009")) res2Bab <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Babendreier2015")) res2Bak <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Baker2002")) print(null_SS) # full model for comparison print(res2Ahm,digits=3) print(res2AlD,digits=3) print(res2Bh,digits=3) print(res2Ech,digits=3) print(res2Hal,digits=3) print(res2Ohn,digits=3) # big change in effect size & conf. intervals print(res2Bab,digits=3) print(res2Bak,digits=3) ## Other diagnostics for site-yr/study model res<-rstandard(null_SS) fit<-fitted(null_SS) library(car) qqPlot(res$resid) # some pretty extreme outliers hat<-hatvalues(null_SS) plot(res$resid~hat) # here too, but outliers have low leverage abline(h=0) outlier<-as.data.frame(abs(res$resid) > 3) names(outlier)[1]<-paste("res") which(outlier$res==TRUE) newdataESout<-newdataES[-c(133,157,159,169,171,268,285,383), ] #remove outliers hist(newdataESout$yi) null_SS_out<- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataESout) # model w/o outliers summary(null_SS_out) # results similar w/ outliers excluded ### Refit moderator-free models, excluding Ohnesorg et al 2009 null_RE_ohn<-rma.mv(yi, vi, data=newdataES, method="REML", subset=(study!="Ohnesorg2009")) null_SS_ohn <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML", subset=(study!="Ohnesorg2009")) ## Examine likelihood plots for the random effects profile(null_SS_ohn,sigma2=1) profile(null_SS_ohn,sigma2=2) ## Examine overall effect size and heterogeneity tests summary(null_RE_ohn) summary(null_SS_ohn) ## Calculate I-squared I2_ohn<-((316.9-375)/316.9)*100 # < 0 reported as zero by convention ## Other diagnostics for site-yr/study model, excluding Ohnesorg res<-rstandard(null_SS_ohn) fit<-fitted(null_SS_ohn) library(car) qqPlot(res$resid) # still some outliers hat<-hatvalues(null_SS_ohn) plot(res$resid~hat) # outliers w/ low leverage abline(h=0) outlier<-as.data.frame(abs(res$resid) > 3) names(outlier)[1]<-paste("res") which(outlier$res==TRUE) newdataESout<-newdataES[-c(133,157,159,169,171,199, 268,285), ] #remove outliers hist(newdataESout$yi) null_SS_ohn_out<- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataESout, subset=(study!="Ohnesorg2009")) # model w/o outliers summary(null_SS_ohn_out) # results very similar w/ outliers excluded ### Fit moderator model to test the influence of other variables on effect size ## Create design matrix with effects coding and fit study-level model design2<-model.matrix(~insect+habitat+ai+plotsize+early+app, contrasts=list(habitat="contr.sum", insect="contr.sum", ai="contr.sum", app="contr.sum")) design2 <- design2[,c(-1)] mod <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES) ## Examine effect size and heterogeneity tests summary(mod) ## Calculate I-squared # See Higgins et al. 2003 I2<-((360.8055-377)/360.8055)*100 # by convention < 0 set to 0 ## Examine likelihood plots for each of the random effects # Takes a long time! # http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011 profile(mod,sigma2=1) profile(mod,sigma2=2) ## Leave-one-out analysis, moderator analysis resAhm <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ahmad20052006")) resAlD <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="AlDeeb2003")) resBh <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Bhatti2005")) resEch <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Echegaray2009")) resHal <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Hallett2014")) resOhn <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ohnesorg2009")) resBab <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Babendreier2015")) resBak <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Baker2002")) print(mod) # full model for comparison print(resAhm,digits=3) print(resAlD,digits=3) print(resBh,digits=3) print(resEch,digits=3) print(resHal,digits=3) print(resOhn,digits=3) # intercept quite different print(resBab,digits=3) print(resBak,digits=3) ## Other diagnostics for the moderator model res<-rstandard(mod) fit<-fitted(mod) hat<-hatvalues(mod) plot(res$resid~fit) plot(res$resid~hat) abline(h=0) library(car) qqPlot(res$resid) # again big outliers plot(res$resid~insect) plot(res$resid~habitat) plot(res$resid~fxn_grp) plot(res$resid~crop) plot(res$resid~ai) plot(res$resid~review) plot(res$resid~plotsize) plot(res$resid~early) outlier<-as.data.frame(abs(res$resid) > 3) ex.outlier<-as.data.frame(abs(res$resid) > 8) # category for just extreme outliers names(outlier)[1]<-paste("res") which(outlier$res==TRUE) names(ex.outlier)[1]<-paste("res") which(ex.outlier$res==TRUE) newdataESout<-newdataES[-c(157,159,171,268,285), ] design3<-design2[-c(157,159,171,268,285),] newdataESexout<-newdataES[-c(157,159), ] design4<-design2[-c(157,159),] hist(newdataESout$yi) hist(newdataESexout$yi) modout <- rma.mv(yi, vi, mods= ~design3, random = ~ 1 | study/site_yr_ID, data=newdataESout) # overall model modexout <- rma.mv(yi, vi, mods= ~design4, random = ~ 1 | study/site_yr_ID, data=newdataESexout) print(modout) print(modexout) # results very similar w/ outliers excluded ### Refit moderator model, excluding Ohnesorg et al. 2009 mod_ohn <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ohnesorg2009")) ## Examine effect size and heterogeneity tests summary(mod_ohn) ## Calculate I-squared # See Higgins et al. 2003 I2<-((309.4376-369)/309.4376)*100 # by convention < 0 set to 0 ## Diagnostics for moderator model w/o Ohnesorg et al. 2009 res<-rstandard(mod_ohn) fit<-fitted(mod_ohn) hat<-hatvalues(mod_ohn) plot(res$resid~fit) plot(res$resid~hat) abline(h=0) library(car) qqPlot(res$resid) # again big outliers outlier<-as.data.frame(abs(res$resid) > 3) ex.outlier<-as.data.frame(abs(res$resid) > 8) names(outlier)[1]<-paste("res") which(outlier$res==TRUE) names(ex.outlier)[1]<-paste("res") which(ex.outlier$res==TRUE) newdataESout<-newdataES[-c(133,157,159,169,171,268,285), ] design3<-design2[-c(133,157,159,169,171,268,285),] newdataESexout<-newdataES[-c(157,159), ] design4<-design2[-c(157,159),] hist(newdataESout$yi) hist(newdataESexout$yi) mod_ohnout <- rma.mv(yi, vi, mods= ~design3, random = ~ 1 | study/site_yr_ID, data=newdataESout, subset=(study!="Ohnesorg2009")) # overall model mod_ohnexout <- rma.mv(yi, vi, mods= ~design4, random = ~ 1 | study/site_yr_ID, data=newdataESexout, subset=(study!="Ohnesorg2009")) print(mod_ohnout) print(mod_ohnexout) # results very similar w/ outliers excluded #### Publication bias - these tools only seem to be available for rma command ### Unclear how to test this with clustered/hierarchical data structure ### Also not very relevant here b/c CI encloses zero in all models fsn(yi, vi, data=newdataES, type="Rosenberg") #Rosenberg's fail-safe N null<-rma(yi, vi, data=newdataES, method="REML") trimfill(null) #suggests no missing observations ### Weighted histogram of effect sizes library(ggplot2) ggplot(newdataES, aes(x=yi, fill=study, weight=(1/newdataES$vi))) + geom_histogram(binwidth=0.3, closed="right") + geom_vline(xintercept = 0) + theme_bw(base_size=22) + xlab(expression(paste("Effect size (", italic("d"),")"))) + ylab("") + scale_fill_manual(values = c("dodgerblue2","#E31A1C", # red "green4", "#6A3D9A", # purple "#FF7F00", # orange "brown","gold1", "skyblue2","#FB9A99", # lt pink "palegreen2", "#CAB2D6", # lt purple "#FDBF6F", # lt orange "gray70", "khaki2", "maroon","orchid1","deeppink1","blue1","steelblue4", "darkturquoise"))