library(metafor) #### Import data & prepare dataset data <- read.csv("Supplemental Data S1.NoInsecticide.csv") ### Prepare the dataset by removing unnecessary observations, etc. data1<-subset(data, exclude==0 & (nontarget_functional_group=="omnivore"| nontarget_functional_group=="mixed"| nontarget_functional_group=="predator"| nontarget_functional_group=="parasitoid") & (response_variable_abbrev=="activity-density"| response_variable_abbrev=="abundance"| response_variable_abbrev=="parasitism")) ### Create new dataset with fewer variables myvars<-c("id","study","peer_reviewed","continent","crop_species", "rate_g_kg_seed", "rate_mg_seed","variety","planting_date", "crop_group","neonic_active_ingr","neonic_group","site_yr_ID", "plot_size","true_control_sample_size","true_expmtl_sample_size", "nontarget_location","nontarget_phylum","nontarget_class", "nontarget_insect","nontarget_order","nontarget_family", "nontarget_genus","nontarget_species","nontarget_finest_grouping", "nontarget_functional_group","nontarget_stage","control_mean", "expmtl_mean","control_std_dev","expmtl_std_dev","control_RSD", "early_samp_prop","nontarget_taxa","response_variable_abbrev") newdata <- data1[myvars] ### Make sure all continuous variables are stored as numbers newdata$plot_size<-as.numeric(as.character(newdata$plot_size)) newdata$true_control_sample_size<-as.numeric(as.character(newdata$true_control_sample_size)) newdata$expmtl_sample_size<-as.numeric(as.character(newdata$true_expmtl_sample_size)) newdata$control_std_dev<-as.numeric(as.character(newdata$control_std_dev)) newdata$expmtl_std_dev<-as.numeric(as.character(newdata$expmtl_std_dev)) newdata$control_RSD<-as.numeric(as.character(newdata$control_RSD)) newdata$early_samp_prop<-as.numeric(as.character(newdata$early_samp_prop)) str(newdata) ### Calculate effect sizes and variance newdataES<-escalc(measure="SMD", n1i=true_expmtl_sample_size, n2i=true_control_sample_size, m1i=expmtl_mean, m2i=control_mean, sd1i=expmtl_std_dev, sd2i=control_std_dev, data = newdata, vtype = "UB") ### Set up variables for the model insect<-factor(newdataES[,"nontarget_insect"],levels=c("Insect","Non-insect")) habitat<-factor(newdataES[,"nontarget_location"],levels=c("soil-associated","aboveground")) fxn_grp<-factor(newdataES[,"nontarget_functional_group"],levels=c("mixed","omnivore","predator","parasitoid")) crop<-factor(newdataES[,"crop_group"],levels=c("maize","soybean","other")) ai<-factor(newdataES[,"neonic_group"],levels=c("IMI","THX/CLO")) review<-factor(newdataES[,"peer_reviewed"],levels=c("yes","no")) plotsize<-newdataES[,"plot_size"] early<-newdataES[,"early_samp_prop"] taxa<-newdataES[,"nontarget_taxa"] hist(plotsize) hist(log(plotsize)) plotsize<-log(plotsize) #transform b/c of crazy distribution plotsize<-scale(plotsize, scale = FALSE) # center log plot size hist(early) hist(log(early+.1)) early<-log(early+.1) #transform to make more normal early<-scale(early, scale=FALSE) # center proportion early #### Assess potential collinearity among predictors ### Examine collinearity among predictors via GVIF library(nlme) mod<-lme(yi ~ insect+habitat+fxn_grp+crop+ai+review+plotsize+early, random=~1|study/site_yr_ID, data=newdataES) library(car) vif(mod) ### Examine collinearity among predictors via correlations design<-model.matrix(~insect+habitat+fxn_grp+crop+ai+review+plotsize+early) design <- design[,c(-1)] design<-scale(design) S<-cov(design) S.vec<-c(S) S.vec<-S.vec[S.vec<.999] # exclude self-correlations (=1) hist(S.vec, breaks=10, main="Correlations among moderators") summary(S.vec) small<-S.vec<0.2 table(small) ### Look at correlation structure of predictors using PCA/varimax rotation S.Eig<-eigen(S) E_S<-S.Eig$vectors Lamb_S<-S.Eig$values par(mfrow=c(1,2)) plot(Lamb_S,main="Scree Plot") plot(cumsum(Lamb_S)/sum(Lamb_S),main="Explained Variance") abline(h=0.85,col="red") abline(v=1:12,col="grey",lty=2) L<-S.Eig$vectors[,1:11]%*%diag(sqrt(S.Eig$values[1:11])) # take e-vectors*sqrt(e-values)-> L matrix L_PC<-varimax(L)$loadings # rotate L to be more interpretable barplot(-L_PC[,1],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8)) barplot(-L_PC[,2],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8)) barplot(-L_PC[,3],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8)) barplot(-L_PC[,4],names=names(design),col=c(1,2,3,3,3,4,4,5,6,7,8)) # looks pretty good, no strong correlations btw moderators #### Fit meta-analysis models ### Fit moderator-free models for overall effect size & heterogeneity ## Simple random-effects model null_RE<-rma.mv(yi, vi, data=newdataES, method="REML") ## Model that accounts for clustering of observations w/in studies null_SS <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML") ## Examine likelihood plots for the random effects # This takes a long time! # http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011 profile(null_SS,sigma2=1) profile(null_SS,sigma2=2) # It looks like site-year explains most of the clustering w/in studies ## Examine overall effect size and heterogeneity tests summary(null_RE) summary(null_SS) ## Calculate I-squared (same for these two models) # See Higgins et al. 2003 I2<-((622.5037-606)/622.5037)*100 ## Leave one out analysis for the study-site-yr model (null_SS) res2Ahm <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ahmad20052006")) res2AlD <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="AlDeeb2003")) res2Bh <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Bhatti2005")) res2Ch <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Charlet2007")) res2Dg <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Douglas2015")) res2Ech <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Echegaray2009")) res2Hal <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Hallett2014")) res2Har <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Harmon2006")) res2HB <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="HeidelBaker2012Carter2013")) res2Kr <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Krauter2001")) res2Ohn <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ohnesorg2009")) res2Sg <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Seagraves2012")) res2Tin <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Tinsley2012")) res2Alb <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Albajes2003")) res2Bab <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Babendreier2015")) res2Bak <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Baker2002")) res2delaP <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="delaPoza2005Farinos2008")) res2Schm <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Schmidt2002")) res2SC <- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="SoteloCardona2010")) print(null_SS) # full model for comparison print(res2Ahm,digits=3) print(res2AlD,digits=3) print(res2Bh,digits=3) #Q test becomes marginal (P = 0.06) print(res2Ch,digits=3) print(res2Dg,digits=3) print(res2Ech,digits=3) print(res2Hal,digits=3) print(res2Har,digits=3) print(res2HB,digits=3) print(res2Kr,digits=3) print(res2Ohn,digits=3) print(res2Sg,digits=3) print(res2SC,digits=3) print(res2Tin,digits=3) print(res2Alb,digits=3) print(res2Bab,digits=3) print(res2Bak,digits=3) print(res2delaP,digits=3) print(res2Schm,digits=3) ## Other diagnostics for the site-yr/study model res<-rstandard(null_SS) fit<-fitted(null_SS) library(car) qqPlot(res$resid) # long tails, but not too bad hat<-hatvalues(null_SS) plot(res$resid~hat) abline(h=0) outlier<-as.data.frame(abs(res$resid) > 3) names(outlier)[1]<-paste("res") which(outlier$res==TRUE) newdataESout<-newdataES[-c(118,561), ] # remove outliers (residual >3 or <-3) hist(newdataESout$yi) null_SS_out<- rma.mv(yi, vi, random = ~ 1 | study/site_yr_ID, data=newdataESout) # model w/o outliers summary(null_SS_out) # results very similar w/ outliers excluded ### Fit moderator model to test the influence of other variables on effect size ## Create design matrix with effects coding for moderators design2<-model.matrix(~insect+habitat+fxn_grp+crop+ai+review+plotsize+early, contrasts=list(crop="contr.sum", habitat="contr.sum", fxn_grp="contr.sum", insect="contr.sum", ai="contr.sum", review="contr.sum")) design2 <- design2[,c(-1)] ## Fit model mod<-rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, method="REML") ## Examine effect size and heterogeneity tests summary(mod) ## Calculate I-squared # See Higgins et al. 2003 I2<-((584.4791-595)/584.4791)*100 # by convention < 0 set to 0 ## Examine likelihood plots for each of the random effects # This takes a long time! # http://www.metafor-project.org/doku.php/analyses:konstantopoulos2011 profile(mod,sigma2=1) # zero estimate; does not seem to be identifiable profile(mod,sigma2=2) # this seems identifiable ## Test significance of moderators # http://www.metafor-project.org/doku.php/tips:testing_factors_lincoms anova(mod, btt=c(2)) #insect, p = .0032 anova(mod, btt=c(3)) #habitat, p = .234 anova(mod, btt=c(4,5,6)) #fxn grp, p = .132 anova(mod, btt=c(7,8)) #crop, p = .674 anova(mod, btt=c(9)) #ai, p = .320 anova(mod, btt=c(10)) #peer review, p = .477 anova(mod, btt=c(11)) #plot size, p = .559 anova(mod, btt=c(12)) #early season, p = .435 ## Generate CI's from fitted moderator model # Store mean values for moderator variables in the dataset predmeans<-as.data.frame(colMeans(design2)) names(predmeans)[names(predmeans)=="colMeans(design2)"] <- "mean" # Generate CI's weighted similarly to moderator representation in dataset # For insects and non-insects, all other values weighted mean across groups CI_ins<-predict(mod, newmods=c(1,predmeans[2:11,]), addx=TRUE) CI_nonins<-predict(mod, newmods=c(-1,predmeans[2:11,]), addx=TRUE) ## Leave-one-out analysis, moderator model resAhm <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ahmad20052006")) resAlD <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="AlDeeb2003")) resBh <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Bhatti2005")) resCh <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Charlet2007")) resDg <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Douglas2015")) resEch <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Echegaray2009")) resHal <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Hallett2014")) resHar <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Harmon2006")) resHB <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="HeidelBaker2012Carter2013")) resKr <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Krauter2001")) resOhn <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Ohnesorg2009")) resSg <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Seagraves2012")) resTin <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Tinsley2012")) resAlb <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Albajes2003")) resBab <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Babendreier2015")) resBak <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Baker2002")) resdelaP <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="delaPoza2005Farinos2008")) resSchm <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="Schmidt2002")) resSC <- rma.mv(yi, vi, mods= ~design2, random = ~ 1 | study/site_yr_ID, data=newdataES, subset=(study!="SoteloCardona2010")) print(mod) # full model for comparison print(resAhm,digits=3) print(resAlD,digits=3) print(resBh,digits=3) print(resCh,digits=3) print(resDg,digits=3) print(resEch,digits=3) print(resHal,digits=3) print(resHar,digits=3) print(resHB,digits=3) print(resKr,digits=3) print(resOhn,digits=3) # Q turned to P = 0.057 print(resSg,digits=3) print(resSC,digits=3) # Q turned to P = 0.338 print(resTin,digits=3) print(resAlb,digits=3) print(resBab,digits=3) print(resBak,digits=3) print(resdelaP,digits=3) print(resSchm,digits=3) ## Other diagnostics for the moderator model res<-rstandard(mod) fit<-fitted(mod) library(car) qqPlot(res$resid) hat<-hatvalues(mod) plot(res$resid~fit) plot(res$resid~hat) abline(h=0) plot(res$resid~insect) plot(res$resid~habitat) plot(res$resid~fxn_grp) plot(res$resid~crop) plot(res$resid~ai) plot(res$resid~review) plot(res$resid~plotsize) plot(res$resid~early) outlier<-as.data.frame(abs(res$resid) > 3) names(outlier)[1]<-paste("res") which(outlier$res==TRUE) newdataESout<-newdataES[-c(118,561), ] # remove outliers design3<-design2[-c(118,561),] # refit design matrix w/o outliers hist(newdataESout$yi) modout <- rma.mv(yi, vi, mods= ~design3, random = ~ 1 | study/site_yr_ID, data=newdataESout) #refit model w/o outliers summary(modout) anova(modout, btt=c(2)) #insect, p = .0038 anova(modout, btt=c(3)) #habitat, p = .271 anova(modout, btt=c(4,5,6)) #fxn grp, p = .156 anova(modout, btt=c(7,8)) #crop, p = .696 anova(modout, btt=c(9)) #ai, p = .312 anova(modout, btt=c(10)) #peer review, p = .488 anova(modout, btt=c(11)) #plot size, p = .547 anova(modout, btt=c(12)) #early season, p = .437 # results very similar w/ outliers excluded #### Publication bias - these tools only seem to be available for rma command ### Unclear how to test this with clustered/hierarchical data structure fsn(yi, vi, data=newdataES, type="Rosenberg") #Rosenberg's fail-safe N null<-rma(yi, vi, data=newdataES, method="REML") trimfill(null) #suggests no missing observations ### Weighted histogram of effect sizes library(ggplot2) ggplot(newdataES, aes(x=yi, fill=study, weight=(1/newdataES$vi))) + geom_histogram(binwidth=0.35, closed="right") + geom_vline(xintercept = 0) + theme_bw(base_size=22) + xlab(expression(paste("Effect size (", italic("d"),")"))) + ylab("") + scale_fill_manual(values = c("dodgerblue2","#E31A1C", # red "green4", "#6A3D9A", # purple "#FF7F00", # orange "brown","gold1", "skyblue2","#FB9A99", # lt pink "palegreen2", "#CAB2D6", # lt purple "#FDBF6F", # lt orange "gray70", "khaki2", "maroon","orchid1","deeppink1","blue1","steelblue4", "darkturquoise"))