# Change this file path to your own preferred working directory, which holds the data setwd("./") # REQUIRED LIBRARIES #library(devtools) #devtools::install_github("singmann/afex@master") #devtools::install_github("davidgohel/ReporteRsjars") #devtools::install_github("davidgohel/ReporteRs") #devtools::install_github("tomwenseleers/export") library(afex) # also loads lme4 library(export) library(car) library(effects) library(doBy) library(lattice) library(lsmeans) library(multcomp) library(coxme) library(ggplot2) library(ggthemes) library(scales) library(mgcv) library(brms) # for Bayesian analyses, for installation info see https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started library(caret) library(coda) library(survival) library(spBayesSurv) library(ordinal) library(MASS) # LOAD DATA # Individual-level data data <- read.csv("Original individual data.csv") data$cage_id <- factor(data$cage_id) data$treatment <- factor(data$treatment, levels=c("hexane", "c23_high","c23_low","c25_high","c25_low","c27_high","c27_low")) head(data) # We first expand the data to include additional, derived variables. # We added the following variables: # "developed" (0/1): this describes whether the focal worker has a terminal oocyte >2 mm or not (described by Amsalem et al as indicating "ready-to-lay eggs"), # and ignores the presence/absence of oocyte resorption in that worker. # "developed.non.resorbed" (0/1): describes whether the focal worker has a terminal oocyte >2 mm AND has # non-resorbed ovaries or no data regarding resorption. # This is the metric most comparable to that used in Van Oystaeyen et al. 2014, which was based on the ordinal scoring scale developed by Duchateau & Velthuis (1989). # Specifically, in Van Oystaeyen et al. 2014, having resorbed ovaries (type IV-R of Duchateau & Velthuis 1989) and having ovaries with a mature, # nonresorbed terminal oocyte (type IV of Duchateau & Velthuis 1989) are mutually exclusive possibilities. # In Amsalem et al.'s scoring system this is not the case, as workers with resorbed ovaries could there also be scored as having fully developed ovaries. # "bodysizec": mean-centered body size data$developed <- (data$avg_terminal_oocyte_diam_mm>2)*1 data$developed.non.resorbed <- (data$developed==1 & (data$resorbed==0|is.na(data$resorbed)))*1 data$bodysizec <- data$body_size_mm - mean(data$body_size_mm) # Group-level data (per treatment cage) group.data <- read.csv("Original group data.csv") group.data$cage_id <- factor(group.data$cage_id) group.data$treatment <- factor(group.data$treatment, levels=c("hexane", "c23_high","c23_low","c25_high","c25_low","c27_high","c27_low")) head(group.data) # Again, we first expand the data to include additional, derived variables. # We added the following variables: # "event": describes if egg laying was observed in a particular cage, allowing for right censored survival analysis. # "meanbodysizec": average body size of individuals in cage, mean-centered, taken from the individual data. group.data$event <- (group.data$time_to_egglaying_days<11)*1 meanvals <- summaryBy(body_size_mm + avg_terminal_oocyte_diam_mm ~ cage_id, data=data, FUN="mean", na.rm=T) colnames(meanvals) <- c("cage_id", "meanbodysize", "meantermoocytesize") group.data$meanbodysize <- meanvals$meanbodysize[match(group.data$cage_id, meanvals$cage_id)] group.data$meanbodysizec <- scale(meanvals$meanbodysize[match(group.data$cage_id, meanvals$cage_id)], scale=F) # SAMPLE SIZES PER WORKER TYPE, TREATMENT AND COLONY (TABLE 1) group.data.exp=group.data[group.data$worker_type=="experienced",] group.data.exp$colony_id=droplevels(group.data.exp$colony_id) group.data.naiv=group.data[group.data$worker_type=="naive",] group.data.naiv$colony_id=droplevels(group.data.naiv$colony_id) table(group.data.exp$treatment,group.data.exp$colony_id) # a b c d e f g # hexane 8 2 0 3 1 3 6 # c23_high 8 0 0 2 4 2 4 # c23_low 0 0 0 0 5 1 4 # c25_high 10 0 0 4 4 2 3 # c25_low 8 1 1 1 4 1 5 # c27_high 7 0 0 3 4 2 4 # c27_low 0 0 0 0 4 2 4 table(group.data.naiv$treatment,group.data.naiv$colony_id) table(group.data$treatment,group.data$colony_id) table(data$treatment) # mix # hexane 16 # c23_high 6 # c23_low 4 # c25_high 13 # c25_low 12 # c27_high 6 # c27_low 6 # S1. ANALAYSES SHOWING THAT BODY SIZE HAD AN IMPORTANT CONFOUNDING EFFECT (TABLE S1) set_sum_contrasts() # because data are imbalance we use Type III tests below and use sum coding # Body size differs between colonies (Table S1a): # (we only use the data from known colonies here) Anova(lm(body_size_mm ~ colony_id, data=data[data$colony_id != "mix", ]), type="III") # Analysis of Variance Table # # Response: body_size_mm # Df Sum Sq Mean Sq F value Pr(>F) # colony_id 6 3.689 0.61487 7.0377 4.134e-07 *** # Residuals 373 32.589 0.08737 # Body size also differs between CHC treatments (Table S1b) : # This contrasts with the claim of Amsalem et al that "body size did not differ in the different treatments" Anova(lm(body_size_mm ~ treatment + worker_type, data=data), type="III") # Anova Table (Type III tests) # # Response: body_size_mm # Sum Sq Df F value Pr(>F) # (Intercept) 5698.8 1 56328.5962 < 2.2e-16 *** # treatment 1.6 6 2.6489 0.015295 * # worker_type 0.8 1 8.1854 0.004381 ** # Residuals 56.6 559 # S3. ANALYSIS OF OVARY MEASURE THAT BEST PREDICTS EGG LAYING USING POISSON GLMs # We first expanded the group-dataset to include, per treatment cage, the proportion of workers with # developed and/or resorbed ovaries. The new variables are: # "prop.non.developed.non.resorbed": the proportion of workers with non-developed and non-resorbed ovaries. # "prop.non.developed.resorbed": the proportion of workers with non-developed and resorbed ovaries. # "prop.developed.non.resorbed": the proportion of workers with developed and non-resorbed ovaries. # "prop.developed.resorbed": the proportion of workers with developed but resorbed ovaries. # "num.days.egg.laid": the number of days that eggs were laid in a particular cage data$developed <- (data$avg_terminal_oocyte_diam_mm>2)*1 # data$resorbed[is.na(data$resorbed)] <- 0 # You may count missing/ambiguous data as non-RESORBED, or not, depending on your definition data$non.developed.non.resorbed <- (data$developed==0 & data$resorbed==0)*1 data$non.developed.resorbed <- (data$developed==0 & data$resorbed==1)*1 data$developed.non.resorbed <- (data$developed==1 & data$resorbed==0)*1 data$developed.resorbed <- (data$developed==1 & data$resorbed==1)*1 data$ovdevmultinom <- NA # ovary development rescored as multinomial variable with all 4 categories data$ovdevmultinom[data$non.developed.non.resorbed==1] = "nondevnonresorbed" data$ovdevmultinom[data$non.developed.resorbed==1] = "nondevresorbed" data$ovdevmultinom[data$developed.non.resorbed==1] = "devnonresorbed" data$ovdevmultinom[data$developed.resorbed==1] = "devresorbed" data$ovdevmultinom = factor(data$ovdevmultinom,levels=c("nondevnonresorbed","devnonresorbed","devresorbed","nondevresorbed")) data$ovdevordered <- ordered(data$ovdevmultinom,levels=c("nondevnonresorbed","nondevresorbed","devresorbed","devnonresorbed")) # ovary development scored as ordinal variable ordered from low to high ovary development numcatgs <- summaryBy(non.developed.non.resorbed + non.developed.resorbed + developed.non.resorbed + developed.resorbed ~ cage_id, data=data, FUN="sum", na.rm=T) colnames(numcatgs) <- c("cage_id", "non.developed.non.resorbed", "non.developed.resorbed", "developed.non.resorbed", "developed.resorbed") numcatgs$num.workers <- rowSums(numcatgs[,2:5], na.rm=T) group.data$num.non.developed.non.resorbed <- numcatgs$non.developed.non.resorbed[match(group.data$cage_id, numcatgs$cage_id)] group.data$num.non.developed.resorbed <- numcatgs$non.developed.resorbed[match(group.data$cage_id, numcatgs$cage_id)] group.data$num.developed.non.resorbed <- numcatgs$developed.non.resorbed[match(group.data$cage_id, numcatgs$cage_id)] group.data$num.developed.resorbed <- numcatgs$developed.resorbed[match(group.data$cage_id, numcatgs$cage_id)] group.data$num.workers <- numcatgs$num.workers[match(group.data$cage_id, numcatgs$cage_id)] group.data$prop.non.developed.non.resorbed <- group.data$num.non.developed.non.resorbed/group.data$num.workers group.data$prop.non.developed.resorbed <- group.data$num.non.developed.resorbed/group.data$num.workers group.data$prop.developed.non.resorbed <- group.data$num.developed.non.resorbed/group.data$num.workers group.data$prop.developed.resorbed <- group.data$num.developed.resorbed/group.data$num.workers group.data$num.days.egg.laid <- (11-group.data$time_to_egglaying_days) # We then used the proportions of workers with each of the different types of ovary activation as well as # the number of days that eggs were laid, centered mean body size and worker type # as possible predictors of worker egg laying: # S3a. ANALYSIS OF OVARY MEASURE THAT BEST PREDICTS EGG LAYING USING QUASIPOISSON GLM # here we code colony_id as a fixed factor and take into account possible overdispersion # at the cage level by using a quasipoisson family set_treatment_contrasts() fit <- glm(total_eggs ~ prop.non.developed.resorbed + prop.developed.non.resorbed + prop.developed.resorbed + num.days.egg.laid + colony_id, data=group.data[complete.cases(group.data),], family=quasipoisson) summary.predsegglaying.glm <- summary(fit)$coefficients summary.predsegglaying.glm <- data.frame(summary.predsegglaying.glm,confint(fit)) summary.predsegglaying.glm <- summary.predsegglaying.glm[,c(1,5,6,2,3,4)] colnames(summary.predsegglaying.glm) <- c("Estimate","Lower95CL","Upper95CL","SE","t.value","p.value") write.csv(summary.predsegglaying.glm, file="summary.predsegglaying.glm.csv") # check importance of different variables in the model using formal effect size estimates varimp=data.frame(varImp(fit)) colnames(varimp)=c("z_value") varimp$r=varimp$z_value/sqrt(varimp$z_value^2+fit$df.residual) # partial correlation coefficient (Nakagawa & Cuthill 2007) varimp # after the nr of days over which eggs were laid, the prop of workers with developed and nonresorbed eggs had the highest importance in predicting the total nr of eggs laid (based on the absolute t value as a measure of the effect size) write.csv(varimp,file="varimp.predsegglaying.glm.csv") # check if multicollinearity was a problem vifs <- as.data.frame(car::vif(fit)) # all variance inflation factors were <<5, hence multicollinearity was not a problem write.csv(vifs,file="vifs.predsegglaying.glm.csv") summary(fit) # S3b. ANALYSIS OF OVARY MEASURE THAT BEST PREDICTS EGG LAYING USING POISSON GLMM # here we include colony_id and cage_id as random intercepts instead set_treatment_contrasts() fit <- glmer(total_eggs ~ (1|cage_id) + (1|colony_id) + prop.non.developed.resorbed + prop.developed.non.resorbed + prop.developed.resorbed + num.days.egg.laid, data=group.data, family=poisson, control=glmerControl(optimizer="Nelder_Mead", optCtrl=list(maxfun=2e4))) AIC(fit) # 1257.673 (lowest Aikaike Information Criterion, best model of any alternatives we tried, # note that excluding covariate num.days.egg.laid gave qualitatively identical results, as did # the inclusion of mean-centered mean body size as an additional covariate; either of these models # had a worse AIC though) summary(fit) conf <- as.data.frame(confint(fit)) # confidence intervals on fixed & random coefs write.csv(conf, file="confints.predsegglaying.glmm.csv") summary.predsegglaying <- summary(fit)$coefficients write.csv(summary.predsegglaying, file="summary.predsegglaying.glmm.csv") # The proportion of workers with developed and non-resorbed ovaries comes out as best predictor # of worker egg laying in all analyses based on formal effect size estimates # S4a-c. ANALYSIS OF TOTAL NUMBER OF EGGS LAID USING POISSON MIXED MODEL (Tables S4a-c) # model & results table set_sum_contrasts() egg.number.model <- glmer(total_eggs ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + meanbodysizec, data=group.data, family=poisson, control=glmerControl(optimizer="bobyqa")) Anova(egg.number.model, type="III") set_treatment_contrasts() egg.number.model <- glmer(total_eggs ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + meanbodysizec, data=group.data, family=poisson, control=glmerControl(optimizer="bobyqa")) AIC(egg.number.model) summary(egg.number.model) summary.egg.number <- data.frame(summary(egg.number.model)$coefficients) write.csv(summary.egg.number, file="summary.egg.number.glmm.csv") conf <- confint(egg.number.model) write.csv(conf, file="confints.egg.number.glmm.csv") # test for multicollinearity vifs <- as.data.frame(car::vif(glm(total_eggs ~ worker_type*treatment + meanbodysizec, data=group.data))) # al generalized variance inflation factors corrected for d.f. are less than 5, the recommended cutoff for collinearity (Fox and Monette 1992) write.csv(vifs, file="vifs.egg.number.glmm.csv") plot(effect(egg.number.model, term="treatment*worker_type"), multiline=T, ci.style="bars") egg.number.df <- data.frame(confint(lsmeans(egg.number.model, list(~treatment|worker_type)), type="response")) write.csv(egg.number.df, file="lsmeans.egg.number.csv") plot(effect(egg.number.model, term="meanbodysizec"), type="response") # cages with larger workers produced more eggs padj <- data.frame(summary(contrast(lsmeans(egg.number.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="fdr"), type="response")) pvals <- data.frame(summary(contrast(lsmeans(egg.number.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="none"), type="response")) pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="contrasts.egg.number.glmm.csv") pvals$treatment <- gsub(" - hexane", "", pvals$contrast) pvals$treatmworkertype <- interaction(pvals$treatment, pvals$worker_type) egg.number.df$treatmworkertype <- interaction(egg.number.df$treatment, egg.number.df$worker_type) egg.number.df$p_value[match(pvals$treatmworkertype, egg.number.df$treatmworkertype)] <- pvals$p.value egg.number.df write.csv(egg.number.df, file="lsmeans.egg.number.glmm.csv") # plot colors <- c("brown3", "black", "green4") pvals$cols <- colors[(pvals$p.value<=0.05)*(pvals[,3]<1)*1+(pvals$p.value<=0.05)*(pvals[,3]>1)*3+(pvals$p.value>0.05)*2] egg.number.df$cols[match(pvals$treatmworkertype, egg.number.df$treatmworkertype)] <- pvals$cols egg.number.df$cols[is.na(egg.number.df$cols)] <- "black" ggplot(egg.number.df, aes(x=treatment, y=rate, colour=cols, group=worker_type, ymin=asymp.LCL, ymax=asymp.UCL)) + geom_errorbar(aes(ymin=asymp.LCL, ymax=asymp.UCL), width=0, size=1) + geom_point(stat="identity", size=5) + facet_wrap(~ worker_type) + theme_few() + theme(legend.position="none", axis.line=element_line(colour="black"), panel.background=element_rect(fill="white"), axis.text=element_text(colour="black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "Total number of eggs laid", x="") + scale_y_continuous(breaks=seq(5,35,5)) + #, trans=log10_trans()) + scale_x_discrete(breaks=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), labels=c("Hexane control", "C23 high", "C23 low", "C25 high", "C25 low", "C27 high", "C27 low")) + ggtitle("") + scale_colour_manual(values=c("black", "brown3", "green4")) graph2ppt(file="figure 1A.pptx") # S4d-e. ANALYSIS OF TOTAL NUMBER OF EGGS LAID USING FIXED EFFECT POISSON GLM (Tables S4d-e) # we ran the data on experienced and naive workers separately due to the unbalanced allocation of colonies to treatments) # possible overdispersion at the cage level is taken care of by using a quasipoisson model set_treatment_contrasts() egg.number.model.exp <- glm(total_eggs ~ colony_id + treatment + meanbodysizec, data=group.data[group.data$worker_type=="experienced",], family=quasipoisson) sum.exp <- summary(egg.number.model.exp)$coefficients sum.exp <- data.frame(model="experienced workers",sum.exp) sum.exp confint.exp <- confint(egg.number.model.exp) confint.exp <- data.frame(model="experienced workers",confint.exp) confint.exp set_sum_contrasts() egg.number.model.exp <- glm(total_eggs ~ colony_id + treatment + meanbodysizec, data=group.data[group.data$worker_type=="experienced",], family=quasipoisson) lsmeans.exp <- as.data.frame(summary(lsmeans(egg.number.model.exp, list(~treatment), type="response"))[[1]])[,-4] lsmeans.exp <- data.frame(model="experienced workers",lsmeans.exp) lsmeans.exp contr.exp <- data.frame(summary(contrast(lsmeans(egg.number.model.exp, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.exp <- data.frame(model="experienced workers",contr.exp) contr.exp set_treatment_contrasts() egg.number.model.naiv <- glm(total_eggs ~ treatment + meanbodysizec, data=group.data[group.data$worker_type=="naive",], family=quasipoisson) sum.naiv <- summary(egg.number.model.naiv)$coefficients sum.naiv <- data.frame(model="naive workers",sum.naiv) sum.naiv confint.naiv <- confint(egg.number.model.naiv) confint.naiv <- data.frame(model="naive workers",confint.naiv) confint.naiv set_sum_contrasts() egg.number.model.naiv <- glm(total_eggs ~ treatment + meanbodysizec, data=group.data[group.data$worker_type=="naive",], family=quasipoisson) lsmeans.naiv <- as.data.frame(summary(lsmeans(egg.number.model.naiv, list(~treatment), type="response"))[[1]])[,-4] lsmeans.naiv <- data.frame(model="naive workers",lsmeans.naiv) lsmeans.naiv contr.naiv <- data.frame(summary(contrast(lsmeans(egg.number.model.naiv, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.naiv <- data.frame(model="naive workers",contr.naiv) contr.naiv sum.all <- rbind(sum.exp,sum.naiv) write.csv(sum.all,"summary.egg.number.glm.csv") confint.all <- rbind(confint.exp,confint.naiv) write.csv(confint.all,"confints.egg.number.glm.csv") lsmeans.all <- rbind(lsmeans.exp,lsmeans.naiv) write.csv(lsmeans.all,"lsmeans.egg.number.glm.csv") contr.all <- rbind(contr.exp,contr.naiv) contr.all$p.fdradj <- p.adjust(contr.all$p.value,method="fdr") contr.all write.csv(contr.all,"contrasts.egg.number.glm.csv") # S4f. ANALYSIS OF TOTAL NUMBER OF EGGS LAID USING BAYESIAN POISSON MIXED MODEL (Table S4f) # we use to separate models for naive and experienced workers to facilitate comparison with the relevant control # we use the default weakly informative/flat prior, but results are not sensitive to the exact choice of prior egg.number.model.b1 <- brm(total_eggs ~ -1 + (1|cage_id) + (1|colony_id) + treatment + meanbodysizec, data=group.data[(group.data$worker_type=="experienced"),], family=poisson, control = list(adapt_delta = 0.99, max_treedepth = 20)) plot(egg.number.model.b1) # trace and density plots for the MCMC samples look OK, so default priors OK summary(egg.number.model.b1) summary(egg.number.model.b1)$fixed # 95% prediction credible intervals on log link scale summary(egg.number.model.b1)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.6441505 0.06395403 0.5309795 0.7811823 781.8235 1.000363 # # $colony_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.4360621 0.1913451 0.1866526 0.9230201 840.9519 1.000055 preds1 <- exp(summary(egg.number.model.b1)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds1 <- data.frame(Model="experienced workers",preds1) colnames(preds1) <- c("Model","Estimate","lower95CI","upper95CI") preds1 stanplot(egg.number.model.b1) ht1 <- hypothesis(egg.number.model.b1,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] # for C25_high experienced the expected value under the hypothesis lies outside the 95% CI for the hexane experienced group ht1 <- data.frame(Model="experienced workers",ht1) colnames(ht1) <- c("Model","Rel.rate","SE","lower95CI","upper95CI","") ht1 <- ht1[,-3] ht1[,2:4] <- exp(ht1[,2:4]) ht1 egg.number.model.b2 <- brm(total_eggs ~ -1 + (1|cage_id) + treatment + meanbodysizec, data=group.data[(group.data$worker_type=="naive"),], family=poisson, control = list(adapt_delta = 0.99, max_treedepth = 20)) plot(egg.number.model.b2) # trace and density plots for the MCMC samples look OK, so default priors OK summary(egg.number.model.b2) summary(egg.number.model.b2)$fixed # 95% prediction credible intervals on log link scale summary(egg.number.model.b2)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.4469352 0.06756455 0.3287768 0.591964 849.5219 1.002741 preds2 <- exp(summary(egg.number.model.b2)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds2 <- data.frame(Model="naive workers",preds2) colnames(preds2) <- c("Model","Estimate","lower95CI","upper95CI") preds2 stanplot(egg.number.model.b2) ht2 <- hypothesis(egg.number.model.b2,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] # for C25_low naive the expected value under the hypothesis lies outside the 95% CI for the hexane experienced group ht2 <- data.frame(Model="naive workers",ht2) colnames(ht2) <- c("Model","Rel.rate","SE","lower95CI","upper95CI","") ht2 <- ht2[,-3] ht2[,2:4] <- exp(ht2[,2:4]) ht2 preds <- rbind(preds1,preds2) preds <- preds[!grepl("mean",rownames(preds)),] # only keep diff treatments write.csv(preds,"preds.egg.number.bayesianmm.csv") hts <- rbind(ht1,ht2) write.csv(hts,"contrasts.egg.number.bayesianmm.csv") # Table S3f # S5a-c. ANALYSIS OF TIME TO EGG-LAYING USING RIGHT-CENSORED FRAILTY MODEL (MIXED EFFECT SURVIVAL MODEL) # model & results table set_treatment_contrasts() egg.latency.model <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ frailty(colony_id, distribution="gaussian") + worker_type * treatment + meanbodysizec, data=group.data, dist="weibull", na.action="na.omit") egg.latency.model AIC(egg.latency.model) summary.egg.latency <- summary(egg.latency.model)$table write.csv(summary.egg.latency, file="summary.egg.latency.glmm.csv") confints.egg.latency <- confint(egg.latency.model) write.csv(confints.egg.latency, file="confints.egg.latency.glmm.csv") set_sum_contrasts() egg.latency.model <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ frailty(colony_id, distribution="gaussian") + worker_type * treatment + meanbodysizec, data=group.data, dist="weibull", na.action="na.omit") Anova(egg.latency.model, type="III") egg.latency.df <- data.frame(confint(lsmeans(egg.latency.model, list(~treatment|worker_type)), type="response")) write.csv(egg.latency.df, file="lsmeans.egg.latency.glmm.csv") padj <- data.frame(summary(contrast(lsmeans(egg.latency.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="fdr"), type="response")) pvals <- data.frame(summary(contrast(lsmeans(egg.latency.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="none"), type="response")) pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="contrasts.egg.latency.glmm.csv") pvals$treatment <- gsub(" - hexane", "", pvals$contrast) pvals$treatmworkertype <- interaction(pvals$treatment, pvals$worker_type) egg.latency.df$treatmworkertype <- interaction(egg.latency.df$treatment, egg.latency.df$worker_type) egg.latency.df$p_value[match(pvals$treatmworkertype, egg.latency.df$treatmworkertype)] <- pvals$p.value egg.latency.df # plot colors <- c("brown3", "black", "green4") pvals$cols <- colors[(pvals$p.value<=0.05)*(pvals[,3]<1)*1+(pvals$p.value<=0.05)*(pvals[,3]>1)*3+(pvals$p.value>0.05)*2] egg.latency.df$cols[match(pvals$treatmworkertype, egg.latency.df$treatmworkertype)] <- pvals$cols egg.latency.df$cols[is.na(egg.latency.df$cols)] <- "black" ggplot(egg.latency.df, aes(x=treatment, y=response, col=cols, ymin=lower.CL, ymax=upper.CL)) + guides(col=FALSE) + geom_errorbar(aes(ymin=lower.CL, ymax=upper.CL, group=worker_type), width=0, size=1, alpha=I(1)) + geom_point(stat="identity", size=5) + facet_wrap(~ worker_type) + theme_few() + theme(legend.position="none", axis.line=element_line(colour="black"), panel.background=element_rect(fill="white"), axis.text=element_text(colour="black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "Onset of egg laying (days)", x="") + scale_x_discrete(breaks=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), labels=c("Hexane control", "C23 high", "C23 low", "C25 high", "C25 low", "C27 high", "C27 low")) + ggtitle("") + scale_colour_manual(values=c("black", "brown3", "green4")) graph2ppt(file="figure 1B.pptx", append=F) # S5d-e. ANALYSIS OF TIME TO EGG-LAYING USING FIXED EFFECT RIGHT-CENSORED WEIBULL SURVIVAL REGRESSION MODEL # to cope with incomplete design we do two separate analyses for the experienced & naive workers # we include colony as a fixed factor here datasubs <- group.data[group.data$worker_type=="experienced",] datasubs$colony_id <- droplevels(datasubs$colony_id) set_treatment_contrasts() egg.latency.model.exp <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ colony_id +treatment + meanbodysizec, data=datasubs, dist="weibull", na.action="na.omit") AIC(egg.latency.model.exp) sum.exp <- summary(egg.latency.model.exp)$table sum.exp <- data.frame(model="experienced workers",sum.exp) sum.exp confint.exp <- confint(egg.latency.model.exp) confint.exp <- data.frame(model="experienced workers",confint.exp) confint.exp set_sum_contrasts() egg.latency.model.exp <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ colony_id + treatment + meanbodysizec, data=datasubs, dist="weibull", na.action="na.omit") Anova(egg.latency.model.exp, type="III") lsmeans.exp <- data.frame(confint(lsmeans(egg.latency.model.exp, list(~treatment)), type="response")) lsmeans.exp <- data.frame(model="experienced workers",lsmeans.exp) lsmeans.exp contr.exp <- data.frame(summary(contrast(lsmeans(egg.latency.model.exp, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response")) contr.exp <- data.frame(model="experienced workers",contr.exp) datasubs <- group.data[group.data$worker_type=="naive",] datasubs$colony_id <- droplevels(datasubs$colony_id) set_treatment_contrasts() egg.latency.model.naiv <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ +treatment + meanbodysizec, data=datasubs, dist="weibull", na.action="na.omit") AIC(egg.latency.model.naiv) sum.naiv <- summary(egg.latency.model.naiv)$table sum.naiv <- data.frame(model="naive workers",sum.naiv) sum.naiv confint.naiv <- confint(egg.latency.model.naiv) confint.naiv <- data.frame(model="naive workers",confint.naiv) confint.naiv set_sum_contrasts() egg.latency.model.naiv <- survreg(Surv(time_to_egglaying_days, event, type="right") ~ treatment + meanbodysizec, data=datasubs, dist="weibull", na.action="na.omit") Anova(egg.latency.model.naiv, type="III") lsmeans.naiv <- data.frame(confint(lsmeans(egg.latency.model.naiv, list(~treatment)), type="response")) lsmeans.naiv <- data.frame(model="naive workers",lsmeans.naiv) lsmeans.naiv contr.naiv <- data.frame(summary(contrast(lsmeans(egg.latency.model.naiv, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response")) contr.naiv <- data.frame(model="naive workers",contr.naiv) sum.all <- rbind(sum.exp,sum.naiv) write.csv(sum.all,"summary.egg.latency.glm.csv") confint.all <- rbind(confint.exp,confint.naiv) write.csv(confint.all,"confints.egg.latency.glm.csv") lsmeans.all <- rbind(lsmeans.exp,lsmeans.naiv) write.csv(lsmeans.all,"lsmeans.egg.latency.glm.csv") contr.all <- rbind(contr.exp,contr.naiv) contr.all$p.fdradj <- p.adjust(contr.all$p.value,method="fdr") write.csv(contr.all,"contrasts.egg.latency.glm.csv") # S5f. ANALYSIS OF TIME TO EGG-LAYING USING BAYESIAN PROPORTIONAL HAZARDS SURVIVAL MODEL set_treatment_contrasts() egg.latency.model.b1 <- survregbayes(Surv(time_to_egglaying_days, event, type="right") ~ frailty(colony_id, distribution="gaussian") + treatment + meanbodysizec, data=group.data[group.data$worker_type=="experienced",]) sum.exp <- summary(egg.latency.model.b1)$coeff sum.exp <- data.frame(model="experienced workers",sum.exp) sum.exp$Relhazard <- exp(sum.exp$Mean) sum.exp$Relhazard.upper95CL <- exp(sum.exp$X95.HPD.Low) sum.exp$Relhazard.lower95CL <- exp(sum.exp$X95.HPD.Upp) sum.exp # CIs for coefficients for c25_high and c27_high do not include 0, i.e. 'significant' in Bayesian sense # (or analogously, relative hazard to start laying eggs is lower than 1) egg.latency.model.b2 <- survregbayes(Surv(time_to_egglaying_days, event, type="right") ~ -1 + treatment + meanbodysizec, data=group.data[group.data$worker_type=="naive",]) sum.naiv <- summary(egg.latency.model.b2)$coeff sum.naiv <- data.frame(model="naive workers",sum.naiv) sum.naiv$Relhazard <- exp(sum.naiv$Mean) sum.naiv$Relhazard.upper95CL <- exp(sum.naiv$X95.HPD.Low) sum.naiv$Relhazard.lower95CL <- exp(sum.naiv$X95.HPD.Upp) sum.naiv # CIs for all coefficients include 0 / relative hazard to start laying relative to control not different from 1 sum.all <- rbind(sum.exp,sum.naiv) sum.all write.csv(sum.all,'summary.egglatency.bayesian.csv') # S6a-c. ANALYSIS OF PROPORTION OF WORKERS WITH ACTIVE OVARIES (DEVELOPED & NON-RESORBED) USING BINOMIAL MIXED MODEL # here we include cage and colony as random intercepts # model & results table set_sum_contrasts() developed.model <- glmer(developed.non.resorbed ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + bodysizec, data=data, family=binomial, control=glmerControl(optimizer="bobyqa")) Anova(developed.model, type="III") set_treatment_contrasts() developed.model <- glmer(developed.non.resorbed ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + bodysizec, data=data, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(developed.model) summary(developed.model) summary.developed <- data.frame(summary(developed.model)$coefficients) write.csv(summary.developed, file="summary.developed.glmm.csv") confint.developed <- confint(developed.model) write.csv(confint.developed, file="confints.developed.glmm.csv") plot(effect(developed.model, term="treatment*worker_type"), type="response") developed.df <- data.frame(confint(lsmeans(developed.model, list(~treatment|worker_type)), type="response")) write.csv(developed.df, file="lsmeans.developed.glmm.csv") plot(effect(developed.model, term="bodysizec"), type="response") # larger workers had a higher probability to lay eggs padj <- summary(contrast(lsmeans(developed.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="fdr"), type="response") pvals <- summary(contrast(lsmeans(developed.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="none"), type="response") pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="contrasts.developed.glmm.csv") pvals$treatment <- gsub(" - hexane", "", pvals$contrast) pvals$treatmworkertype <- interaction(pvals$treatment, pvals$worker_type) developed.df$treatmworkertype <- interaction(developed.df$treatment, developed.df$worker_type) developed.df$p_value[match(pvals$treatmworkertype, developed.df$treatmworkertype)] <- pvals$p.value developed.df #for effect size table summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") effect.sizes <- confint(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="none")) effect.sizes[,2:6] <- 1/exp(effect.sizes[,2:6]) # plot colors <- c("brown3", "black", "green4") pvals$cols <- colors[(pvals$p.value<=0.05)*(pvals[,3]<1)*1+(pvals$p.value<=0.05)*(pvals[,3]>1)*3+(pvals$p.value>0.05)*2] developed.df$cols[match(pvals$treatmworkertype, developed.df$treatmworkertype)] <- pvals$cols developed.df$cols[is.na(developed.df$cols)] <- "black" ggplot(developed.df, aes(x=treatment, y=prob*100, colour=cols, group=worker_type, ymin=asymp.LCL, ymax=asymp.UCL)) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), width=0, size=1) + geom_point(stat="identity", size=5) + facet_wrap(~ worker_type) + theme_few() + theme(legend.position="none", axis.line=element_line(colour="black"), panel.background=element_rect(fill="white"), axis.text=element_text(colour="black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "Workers with active ovaries (%)", x="") + ylim(c(0,100)) + scale_x_discrete(breaks=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), labels=c("Hexane control", "C23-high", "C23-low", "C25-high", "C25-low", "C27-high", "C27-low")) + ggtitle("") + scale_colour_manual(values=c("black", "brown3", "green4")) graph2ppt(file="figure 1C.pptx", append=T) # S6d-e. ANALYSIS OF PROPORTION OF WORKERS WITH ACTIVE OVARIES (DEVELOPED & NON-RESORBED) # USING FIXED EFFECT QUASIBINOMIAL GLM # here we code colony_id as a fixed factor, and take into account possible overdispersion # by using a quasibinomial model set_treatment_contrasts() developed.model.exp <- glm(developed.non.resorbed ~ colony_id + treatment + bodysizec, data=data[data$worker_type=="experienced",], family=quasibinomial) sum.exp <- summary(developed.model.exp)$coefficients sum.exp <- data.frame(model="experienced workers",sum.exp) sum.exp confint.exp <- confint(developed.model.exp) confint.exp <- data.frame(model="experienced workers",confint.exp) confint.exp set_sum_contrasts() developed.model.exp <- glm(developed.non.resorbed ~ colony_id + treatment + bodysizec, data=data[data$worker_type=="experienced",], family=quasibinomial) lsmeans.exp <- as.data.frame(summary(lsmeans(developed.model.exp, list(~treatment), type="response"))[[1]])[,-4] lsmeans.exp <- data.frame(model="experienced workers",lsmeans.exp) lsmeans.exp contr.exp <- data.frame(summary(contrast(lsmeans(developed.model.exp, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.exp <- data.frame(model="experienced workers",contr.exp) contr.exp set_treatment_contrasts() developed.model.naiv <- glm(developed.non.resorbed ~ treatment + bodysizec, data=data[data$worker_type=="naive",], family=quasibinomial) sum.naiv <- summary(developed.model.naiv)$coefficients sum.naiv <- data.frame(model="naive workers",sum.naiv) sum.naiv confint.naiv <- confint(developed.model.naiv) confint.naiv <- data.frame(model="naive workers",confint.naiv) confint.naiv set_sum_contrasts() developed.model.naiv <- glm(developed.non.resorbed ~ treatment + bodysizec, data=data[data$worker_type=="naive",], family=quasibinomial) lsmeans.naiv <- as.data.frame(summary(lsmeans(developed.model.naiv, list(~treatment), type="response"))[[1]])[,-4] lsmeans.naiv <- data.frame(model="naive workers",lsmeans.naiv) lsmeans.naiv contr.naiv <- data.frame(summary(contrast(lsmeans(developed.model.naiv, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.naiv <- data.frame(model="naive workers",contr.naiv) contr.naiv sum.all <- rbind(sum.exp,sum.naiv) write.csv(sum.all,"summary.developed.glm.csv") confints.all <- rbind(confint.exp,confint.naiv) write.csv(confints.all,"confints.developed.glm.csv") lsmeans.all <- rbind(lsmeans.exp,lsmeans.naiv) write.csv(lsmeans.all,"lsmeans.developed.glm.csv") contr.all <- rbind(contr.exp,contr.naiv) contr.all$p.fdradj <- p.adjust(contr.all$p.value,method="fdr") contr.all write.csv(contr.all,"contrasts.developed.glm.csv") # S6f. ANALYSIS OF PROPORTION OF WORKERS WITH ACTIVE OVARIES (DEVELOPED & NON-RESORBED) # USING BAYESIAN BINOMIAL MIXED MODEL # we use to separate models for naive and experienced workers to facilitate comparison with the relevant control # we use the default weakly informative/flat prior, but results are not sensitive to the exact choice of prior developed.model.b1 <- brm(developed.non.resorbed ~ -1 + (1|cage_id) + (1|colony_id) + treatment + bodysizec, data=data[(data$worker_type=="experienced"),], family=bernoulli, control = list(adapt_delta = 0.99, max_treedepth = 20)) plot(developed.model.b1) # trace and density plots for the MCMC samples look OK, so default priors OK summary(developed.model.b1) summary(developed.model.b1)$fixed # 95% prediction credible intervals on logit link scale summary(developed.model.b1)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.5834307 0.299175 0.03616769 1.139406 463.4276 0.9998504 # # $colony_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.6724583 0.4701028 0.05396088 1.820399 743.6222 1.002459 preds1 <- plogis(summary(developed.model.b1)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds1 <- data.frame(worker_type="experienced",preds1) colnames(preds1) <- c("worker_type","Estimate","lower95CI","upper95CI") preds1 stanplot(developed.model.b1) ht1 <- hypothesis(developed.model.b1,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] ht1 <- data.frame(worker_type="experienced",ht1) colnames(ht1) <- c("worker_type","Estimate","SE","lower95CI","upper95CI","") ht1 <- ht1[,-3] ht1$Estimate <- exp(ht1$Estimate) ht1$lower95CI <- exp(ht1$lower95CI) ht1$upper95CI <- exp(ht1$upper95CI) ht1 developed.model.b2 <- brm(developed.non.resorbed ~ -1 + (1|cage_id) + treatment + bodysizec, data=data[(data$worker_type=="naive"),], family=bernoulli, control = list(adapt_delta = 0.99, max_treedepth = 20)) plot(developed.model.b2) # trace and density plots for the MCMC samples look OK, so default priors OK summary(developed.model.b2) summary(developed.model.b2)$fixed # 95% prediction credible intervals on log link scale summary(developed.model.b2)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.7776296 0.4031787 0.05867218 1.59105 448.8731 1.009475 preds2 <- plogis(summary(developed.model.b2)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds2 <- data.frame(worker_type="naive",preds2) colnames(preds2) <- c("worker_type","Estimate","lower95CI","upper95CI") preds2 stanplot(developed.model.b2) ht2 <- hypothesis(developed.model.b2,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] ht2 <- data.frame(worker_type="naive",ht2) colnames(ht2) <- c("worker_type","Estimate","SE","lower95CI","upper95CI","") ht2 <- ht2[,-3] ht2$Estimate <- exp(ht2$Estimate) ht2$lower95CI <- exp(ht2$lower95CI) ht2$upper95CI <- exp(ht2$upper95CI) ht2 # c23_high, c25_high and C27_high lower than control preds <- rbind(preds1,preds2) write.csv(preds,"preds.developed.bayesianmm.csv") hts <- rbind(ht1,ht2) write.csv(hts,"contrasts.developed.bayesianmm.csv") # Table S5f # S6g-h. ANALYSIS OF OVARY DEVELOPMENT USING PROPORTIONAL ODDS LOGISTIC REGRESSION # WITH COLONY INCLUDED AS FIXED FACTOR (Tables S8) # here we analyse the ovary development data using an ordinal model by ordering the 4 ovary development categories from the lowest to the highest ovary development # we include colony_id as a fixed factor in the analysis # and carry out 2 separate analyses for experienced & naive workers to facilitate comparison with the right control datasub <- data[data$colony_id!="c", ] # we exclude colony c here as it has only one replicate and that otherwise we get spurious confidence intervals for one of the parameter coefficients pal <- c("grey",colorRampPalette(c("red3","blue"))(n=3)) datasubs <- datasub[datasub$worker_type=="experienced",] datasubs <- datasubs[complete.cases(datasubs),] datasubs$colony_id <- droplevels(datasubs$colony_id) set_treatment_contrasts() developed.model.exp <- polr(ovdevordered ~ treatment + bodysizec + colony_id, data=datasubs, Hess=TRUE) sum.exp <- summary(developed.model.exp)$coefficients sum.exp <- data.frame(Model="experienced workers",sum.exp) sum.exp confint.exp <- confint(developed.model.exp) confint.exp <- data.frame(Model="experienced workers",confint.exp) confint.exp set_sum_contrasts() developed.model.exp <- polr(ovdevordered ~ treatment + bodysizec + colony_id, data=datasubs, Hess=TRUE) an.exp <- Anova(developed.model.exp, type="III") # treatment not sign, p=0.30 an.exp <- data.frame(Model="experienced workers",an.exp) an.exp plot(effect(developed.model.exp,term="treatment"),ylab="Ovary development",type="probability",style="stacked",colors=pal) preds.exp <- summary(lsmeans(developed.model.exp, pairwise ~ treatment, mode = "mean"),type="response")$lsmeans # mean ovary development preds.exp <- data.frame(Model="experienced workers",preds.exp) preds.exp # test for overall difference in different treatments compared to hexane control lsm <- lsmeans(developed.model.exp, ~ treatment, mode = "latent") contr.exp <- summary(contrast(lsm, method="trt.vs.ctrl", ref=1, adjust="none")) contr.exp <- data.frame(Model="experienced workers",contr.exp) contr.exp # nothing sign here datasubs <- data[data$worker_type=="naive",] datasubs <- datasubs[complete.cases(datasubs),] set_treatment_contrasts() developed.model.naiv <- polr(ovdevordered ~ treatment + bodysizec, data=datasubs, Hess=TRUE) sum.naiv <- summary(developed.model.naiv)$coefficients sum.naiv <- data.frame(Model="naive workers",sum.naiv) sum.naiv confint.naiv <- confint(developed.model.naiv) confint.naiv <- data.frame(Model="naive workers",confint.naiv) confint.naiv set_sum_contrasts() developed.model.naiv <- polr(ovdevordered ~ treatment + bodysizec, data=datasubs, Hess=TRUE) an.naiv <- Anova(developed.model.naiv, type="III") # treatment sign, p=0.01 an.naiv <- data.frame(Model="naive workers",an.naiv) an.naiv plot(effect(developed.model.naiv,term="treatment"),ylab="Ovary development",type="probability",style="stacked",colors=pal) preds.naiv <- summary(lsmeans(developed.model.naiv, pairwise ~ treatment, mode = "mean"),type="response")$lsmeans # mean ovary development preds.naiv <- data.frame(Model="naive workers",preds.naiv) preds.naiv # test for overall difference compared to hexane control lsm <- lsmeans(developed.model.naiv, ~ treatment, mode = "latent") contr.naiv <- summary(contrast(lsm, method="trt.vs.ctrl", ref=1, adjust="none")) contr.naiv <- data.frame(Model="naive workers",contr.naiv) contr.naiv # many treatments sign here, especially the high conc ones sum.all <- rbind(sum.exp,sum.naiv) write.csv(sum.all,"summary.ovdevelopment.ordinal.csv") confint.all <- rbind(confint.exp, confint.naiv) write.csv(confint.all,"confint.ovdevelopment.ordinal.csv") preds <- rbind(preds.exp,preds.naiv)[,-5] write.csv(preds,"preds.ovdevelopment.ordinal.csv") contrasts <- rbind(contr.exp,contr.naiv)[,-c(4,5)] contrasts$estimate <- exp(contrasts$estimate) contrasts$p.fdradj <- p.adjust(contrasts$p.value,method="fdr") write.csv(contrasts,"contrasts.ovdevelopment.ordinal.csv") # S6i-k. ANALYSIS OF OVARY DEVELOPMENT USING CUMULATIVE LOGIT LINK ORDINAL MIXED MODEL # here we analyse the ovary development data using an ordinal model by ordering the 4 ovary development categories from the lowest to the highest ovary development # bot now include colony and cage_id as random factors set_treatment_contrasts() developed.model <- clmm(ovdevordered ~ treatment*worker_type + bodysizec + (1|colony_id) + (1|cage_id), data=data) summary(developed.model) sum <- summary(developed.model)$coefficients sum confints <- confint(developed.model) confints set_sum_contrasts() developed.model <- clmm(ovdevordered ~ treatment*worker_type + bodysizec + (1|colony_id) + (1|cage_id), data=data) preds <- data.frame(summary(lsmeans(developed.model, pairwise ~ treatment|worker_type, mode = "mean"),type="response")$lsmeans) # mean ovary development preds # test for overall difference in different treatments compared to hexane control lsm <- lsmeans(developed.model, ~ treatment|worker_type, mode = "latent") contrasts <- data.frame(summary(contrast(lsm, method="trt.vs.ctrl", ref=1, adjust="none"))) contrasts$p.fdradj <- p.adjust(contrasts$p.value,method="fdr") contrasts write.csv(sum,"summary.ovdevelopment.ordinalclmm.csv") write.csv(confints,"confints.ovdevelopment.ordinalclmm.csv") write.csv(preds[,-5],"preds.ovdevelopment.ordinalclmm.csv") contrasts$estimate <- exp(contrasts$estimate) contrasts$p.fdradj <- p.adjust(contrasts$p.value,method="fdr") write.csv(contrasts[,-c(4,5)],"contrasts.ovdevelopment.ordinalclmm.csv") # S7a-c. ANALYSIS OF AVERAGE TERMINAL OOCYTE SIZE USING BETA DISTRIBUTED GENERALIZED ADDITIVE MIXED MODEL (Tables S6a-c) # here a model in which treatment and worker_type acted additively had the best AIC # note that such a model cannot be fit using a fixed effect model due to the incomplete design used # so here we do not present any alternative model # The average oocyte diameter data show strong truncation and bimodality. To take this into account, # we scale the oocyte diameter data to lie between 0 and 1 for the lower and higher end of the data # and fit a beta distributed generalized additive mixed model. hist(data$avg_terminal_oocyte_diam_mm, breaks=30, col="blue", xlab="Average terminal oocyte diameter (mm)", main="", xlim=range(data$avg_terminal_oocyte_diam_mm)) maxv <- max(data$avg_terminal_oocyte_diam_mm) minv <- min(data$avg_terminal_oocyte_diam_mm) transf <- function(data, eps=0.01) (data-(min(data)-eps))*1/(max(data)-min(data)+2*eps) backtransf <- function(data, maxv, minv, eps=0.01) minv-eps+(maxv-minv+2*eps)*data data$avg_terminal_oocyte_diam_norm <- transf(data$avg_terminal_oocyte_diam_mm) par(mfrow=c(1,1)) hist(data$avg_terminal_oocyte_diam_norm, breaks=30, col="blue", xlab="Average terminal oocyte diameter (normalized between 0 and 1)", main="") # model & results table set_treatment_contrasts() oocyte.model <- gam(avg_terminal_oocyte_diam_norm ~ treatment + worker_type + bodysizec + s(cage_id, bs="re") + s(colony_id,bs="re"), family=betar(link="logit"), data=data) AIC(oocyte.model) summary.oocyte <- as.data.frame(summary(oocyte.model)$p.table) colnames(summary.oocyte)[4] <- "p.value" summary.oocyte$p.FDRadj <- p.adjust(summary.oocyte$p.value,method="fdr") write.csv(summary.oocyte, file="summary.oocyte.mmgam.csv") confint.oocyte <- confint.lm(oocyte.model)[c(1:9),] write.csv(confint.oocyte, file="confint.oocyte.mmgam.csv") anova.oocyte <- anova(oocyte.model)$pTerms.table write.csv(anova.oocyte, file="anova.oocyte.mmgam.csv") oocyte.model.fixed <- gam(avg_terminal_oocyte_diam_norm ~ treatment + worker_type + bodysizec, family=betar(link="logit"), data=data) oocyte.df <- expand.grid(treatment=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), worker_type=c("experienced", "naive"), bodysizec=0) preds <- predict(oocyte.model.fixed, newdata=oocyte.df, type="response", se.fit=TRUE) oocyte.df <- cbind(oocyte.df[,1:2], lsmean=preds$fit, lower.CL=preds$fit-1.96*preds$se.fit, upper.CL=preds$fit+1.96*preds$se.fit) oocyte.df[,3:5] <- sapply(oocyte.df[,3:5], function(x) backtransf(x, maxv=maxv, minv=minv, eps=0.01)) write.csv(oocyte.df, file="lsmeans.oocyte.mmgam.csv") oocyte.df # plot oocyte.df$cols <- rep(c("black", "black", "black", "brown3", "black", "black", "green4"), 2) ggplot(oocyte.df, aes(x=treatment, y=lsmean, colour=cols, ymin=lower.CL, ymax=upper.CL)) + #guides(col=FALSE) + geom_errorbar(aes(ymin=lower.CL, ymax=upper.CL, group=worker_type), width=0, size=1, alpha=I(1)) + geom_point(stat="identity", size=5) + facet_wrap(~ worker_type) + theme_few() + theme(legend.position="none", axis.line=element_line(colour="black"), panel.background=element_rect(fill="white"), axis.text=element_text(colour="black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "Average terminal oocyte size (mm)", x="") + scale_x_discrete(breaks=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), labels=c("Hexane control", "C23 high", "C23 low", "C25 high", "C25 low", "C27 high", "C27 low")) + ggtitle("") + scale_colour_manual(values=c("black", "brown3", "green4")) graph2ppt(file="figure 1D.pptx", append=F) # S8a-c ANALYSIS OF PROPORTIONS OF WORKERS WITH RESORBED OVARIES USING BINOMIAL MIXED MODEL (Tables S7c,d,e) # here we include cage and colony as random intercepts # model & results table set_sum_contrasts() resorption.model <- glmer(resorbed ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + bodysizec, data=data, family=binomial, control=glmerControl(optimizer="bobyqa")) Anova(resorption.model, type="III") set_treatment_contrasts() resorption.model <- glmer(resorbed ~ (1|cage_id) + (1|colony_id) + treatment * worker_type + bodysizec, data=data, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(resorption.model) summary(resorption.model) summary.resorption <- data.frame(summary(resorption.model)$coefficients) write.csv(summary.resorption, file="summary.resorption.gmm.csv") confint.resorption <- confint(resorption.model) write.csv(confint.resorption, file="confint.resorption.gmm.csv") plot(effect(resorption.model, term="treatment*worker_type"), type="response") resorption.df <- data.frame(confint(lsmeans(resorption.model, list(~treatment|worker_type)), type="response")) write.csv(resorption.df, file="lsmeans.resorption.gmm.csv") plot(effect(resorption.model, term="bodysizec"), type="response") # larger workers had a higher probability to have resorbed ovaries padj <- summary(contrast(lsmeans(resorption.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="fdr"), type="response") pvals <- summary(contrast(lsmeans(resorption.model, list(~treatment|worker_type)), method="trt.vs.ctrl", adjust="none"), type="response") pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="contrasts.resorption.gmm.csv") pvals$treatment <- gsub(" - hexane", "", pvals$contrast) pvals$treatmworkertype <- interaction(pvals$treatment, pvals$worker_type) resorption.df$treatmworkertype <- interaction(resorption.df$treatment, resorption.df$worker_type) resorption.df$p_value[match(pvals$treatmworkertype, resorption.df$treatmworkertype)] <- pvals$p.value; resorption.df #for effect size table summary(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") effect.sizes <- confint(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="none")) effect.sizes[,2:6] <- exp(effect.sizes[,2:6]) # plot colors <- c("brown3", "black", "green4") pvals$cols <- colors[(pvals$p.value<=0.05)*(pvals[,3]<1)*1+(pvals$p.value<=0.05)*(pvals[,3]>1)*3+(pvals$p.value>0.05)*2] resorption.df$cols[match(pvals$treatmworkertype, resorption.df$treatmworkertype)] <- pvals$cols resorption.df$cols[is.na(resorption.df$cols)] <- "black" ggplot(resorption.df, aes(x=treatment, y=prob*100, colour=cols, group=worker_type, ymin=asymp.LCL, ymax=asymp.UCL)) + geom_errorbar(aes(ymin=asymp.LCL*100, ymax=asymp.UCL*100), width=0, size=1) + geom_point(stat="identity", size=5) + facet_wrap(~ worker_type) + theme_few() + theme(legend.position="none", axis.line=element_line(colour="black"), panel.background=element_rect(fill="white"), axis.text=element_text(colour="black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "Workers with resorbed ovaries (%)", x="") + ylim(c(0,100)) + scale_x_discrete(breaks=c("hexane", "c23_high", "c23_low", "c25_high", "c25_low", "c27_high", "c27_low"), labels=c("Hexane control", "C23-high", "C23-low", "C25-high", "C25-low", "C27-high", "C27-low")) + ggtitle("") + scale_colour_manual(values=c("black", "brown3", "green4")) # S8d-e ANALYSIS OF PROPORTION OF WORKERS WITH RESORBED OVARIES # USING FIXED EFFECT BINOMIAL GLM # here we code colony_id as a fixed factor, and take into account possible overdispersion # by using a quasibinomial model datasubs <- data[data$colony_id!="c", ] # we exclude colony c here as it has only one replicate and that otherwise we get spurious confidence intervals for one of the parameter coefficients datasubs$colony_id <- droplevels(datasubs$colony_id) set_treatment_contrasts() resorption.model.exp <- glm(resorbed ~ colony_id + treatment + bodysizec, data=datasubs[datasubs$worker_type=="experienced",], family=quasibinomial) sum.exp <- summary(resorption.model.exp)$coefficients sum.exp <- data.frame(model="experienced workers",sum.exp) sum.exp confint.exp <- confint(resorption.model.exp) confint.exp <- data.frame(model="experienced workers",confint.exp) confint.exp set_sum_contrasts() resorption.model.exp <- glm(resorbed ~ colony_id + treatment + bodysizec, data=datasubs[datasubs$worker_type=="experienced",], family=quasibinomial) lsmeans.exp <- as.data.frame(summary(lsmeans(resorption.model.exp, list(~treatment), type="response"))[[1]])[,-4] lsmeans.exp <- data.frame(model="experienced workers",lsmeans.exp) lsmeans.exp contr.exp <- data.frame(summary(contrast(lsmeans(resorption.model.exp, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.exp <- data.frame(model="experienced workers",contr.exp) contr.exp set_treatment_contrasts() resorption.model.naiv <- glm(resorbed ~ treatment + bodysizec, data=datasubs[datasubs$worker_type=="naive",], family=quasibinomial) sum.naiv <- summary(resorption.model.naiv)$coefficients sum.naiv <- data.frame(model="naive workers",sum.naiv) sum.naiv confint.naiv <- confint(resorption.model.naiv, method="Wald") confint.naiv <- data.frame(model="naive workers",confint.naiv) confint.naiv set_sum_contrasts() resorption.model.naiv <- glm(resorbed ~ treatment + bodysizec, data=datasubs[datasubs$worker_type=="naive",], family=quasibinomial) lsmeans.naiv <- as.data.frame(summary(lsmeans(resorption.model.naiv, list(~treatment), type="response"))[[1]])[,-4] lsmeans.naiv <- data.frame(model="naive workers",lsmeans.naiv) lsmeans.naiv contr.naiv <- data.frame(summary(contrast(lsmeans(resorption.model.naiv, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response"))[,-4] contr.naiv <- data.frame(model="naive workers",contr.naiv) contr.naiv sum.all <- rbind(sum.exp,sum.naiv) write.csv(sum.all,"summary.resorption.glm.csv") confint.all <- rbind(confint.exp,confint.naiv) write.csv(confint.all,"confint.resorption.glm.csv") lsmeans.all <- rbind(lsmeans.exp,lsmeans.naiv) write.csv(lsmeans.all,"lsmeans.resorption.glm.csv") contr.all <- rbind(contr.exp,contr.naiv) contr.all$p.fdradj <- p.adjust(contr.all$p.value,method="fdr") contr.all write.csv(contr.all,"contrasts.resorption.glm.csv") # S8f. ANALYSIS OF PROPORTION OF WORKERS WITH RESORBED OVARIES # USING BAYESIAN BINOMIAL MIXED MODEL # we use to separate models for naive and experienced workers to facilitate comparison with the relevant control # we use the default weakly informative/flat prior, but results are not sensitive to the exact choice of prior resorption.model.b1 <- brm(resorbed ~ -1 + (1|cage_id) + (1|colony_id) + treatment + bodysizec, data=data[(data$worker_type=="experienced"),], family=bernoulli, control = list(adapt_delta = 0.999, max_treedepth = 20)) plot(resorption.model.b1) # trace and density plots for the MCMC samples look OK, so default priors OK summary(resorption.model.b1) summary(resorption.model.b1)$fixed # 95% prediction credible intervals on logit link scale summary(resorption.model.b1)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.5438163 0.2766138 0.04766236 1.070159 652.1404 0.9995151 # # $colony_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 0.7707552 0.5798107 0.05216058 2.251463 573.4797 1.001045 preds1 <- plogis(summary(resorption.model.b1)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds1 <- data.frame(worker_type="experienced",preds1) colnames(preds1) <- c("worker_type","Estimate","lower95CI","upper95CI") preds1 stanplot(resorption.model.b1) ht1 <- hypothesis(resorption.model.b1,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] ht1 <- data.frame(worker_type="experienced",ht1) colnames(ht1) <- c("worker_type","Estimate","SE","lower95CI","upper95CI","") ht1 <- ht1[,-3] ht1$Estimate <- exp(ht1$Estimate) ht1$lower95CI <- exp(ht1$lower95CI) ht1$upper95CI <- exp(ht1$upper95CI) ht1 resorption.model.b2 <- brm(resorbed ~ -1 + (1|cage_id) + treatment + bodysizec, data=data[(data$worker_type=="naive"),], family=bernoulli, control = list(adapt_delta = 0.999, max_treedepth = 20)) plot(resorption.model.b2) # trace and density plots for the MCMC samples look OK, so default priors OK summary(resorption.model.b2) summary(resorption.model.b2)$fixed # 95% prediction credible intervals on log link scale summary(resorption.model.b2)$random # confidence intervals on random effect SDs # $cage_id # Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat # sd(Intercept) 1.085657 0.4601807 0.1580355 2.02792 564.11 0.9999446 preds2 <- plogis(summary(resorption.model.b2)$fixed[,c(1,3,4)]) # 95% prediction credible intervals on original backtransformed scale preds2 <- data.frame(worker_type="naive",preds2) colnames(preds2) <- c("worker_type","Estimate","lower95CI","upper95CI") preds2 stanplot(resorption.model.b2) ht2 <- hypothesis(resorption.model.b2,c("treatmentc23_high = treatmenthexane", "treatmentc23_low = treatmenthexane", "treatmentc25_high = treatmenthexane", "treatmentc25_low = treatmenthexane", "treatmentc27_high = treatmenthexane", "treatmentc27_low = treatmenthexane"))$hypothesis[,-5] ht2 <- data.frame(worker_type="naive",ht2) colnames(ht2) <- c("worker_type","Estimate","SE","lower95CI","upper95CI","") ht2 <- ht2[,-3] ht2$Estimate <- exp(ht2$Estimate) ht2$lower95CI <- exp(ht2$lower95CI) ht2$upper95CI <- exp(ht2$upper95CI) ht2 preds <- rbind(preds1,preds2) write.csv(preds,"preds.resorption.bayesianmm.csv") hts <- rbind(ht1,ht2) write.csv(hts,"contrasts.resorption.bayesianmm.csv") # S9. RE-ANALYSIS OF B. TERRESTRIS DATA REPORTED BY VAN OYSTAEYEN ET AL. (Science, 2014) WITH INCLUSION OF BODY SIZE # read in data, we have here also included a column with data on the thorax width of workers as a measure of body size, # which we have measured since and had not considered in our earlier Science study d<-read.csv("Van Oystaeyen et al 2014 data with body size.csv",header=T) names(d) d$treatment<-factor(d$treatment,levels=c("Control","C20_oleate","C22_oleate","C24_oleate","C26_oleate","C25","Q"),ordered=F) d$treatment<-relevel(d$treatment,ref="Control") # C25 is pentacosane, C20_oleate,C22_oleate,C24_oleate and C26_oleate are 4 queen-characteristic esters, Q=colony with a queen, Control=queenless solvent only control d$colony_id<-as.factor(d$colony_id) d$ovary_development_score<-as.factor(d$ovary_development_score) d$resorbed<-(d$ovary_development_score==5)*1 # workers with resorbed terminal oocyte (ovary development scale IV-R of Duchateau & Velthuis 1989), same as column "regressed" in Dryad file d$developed.non.resorbed<-(d$ovary_development_score==4)*1 # workers with a mature, non-resorbed terminal oocyte (ovary development scale IV of Duchateau & Velthuis 1989), same as column "reproductive" in Dryad file d$colony_sizec<-scale(d$colony_size, center=TRUE, scale=FALSE) # mean centered colony size d$bodysizec<-scale(d$thorax_width, center=TRUE, scale=FALSE) # mean centered body size (thorax width) # sample sizes table(d$treatment, d$colony_id) # S9a. BINOMIAL MODEL FOR OOCYTE RESORPTION set_sum_contrasts() resorption.model <- glmer(resorbed ~ (1|colony_id) + treatment + colony_sizec + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) Anova(resorption.model, type="III") # significant effects of treatment, colony size and body size set_treatment_contrasts() resorption.model <- glmer(resorbed ~ (1|colony_id) + treatment + colony_sizec + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(resorption.model) summary(resorption.model) summary.resorbed <- data.frame(summary(resorption.model)$coefficients) write.csv(summary.resorbed, file="vanoystaeyen.summary.resorbed.glmm.csv") plot(effect(resorption.model, term="treatment"), type="response") # C25 causes significantly more workers to have resorbed ovaries plot(effect(resorption.model, term="bodysizec"), type="response") # larger workers were less likely to have resorbed ovaries resorbed.df <- data.frame(confint(lsmeans(resorption.model, list(~treatment)), type="response")) write.csv(resorbed.df, file="vanoystaeyen.lsmeans.resorbed.glmm.csv") padj <- summary(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") pvals <- summary(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response") pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="vanoystaeyen.contrasts.resorbed.glmm.csv") #for effect size table (Table S2) summary(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") effect.sizes <- confint(contrast(lsmeans(resorption.model, list(~treatment)), method="trt.vs.ctrl", adjust="none")) effect.sizes[,2:6] <- exp(effect.sizes[,2:6]) colnames(effect.sizes) <- c("contrast","odds_ratio","SE","df","asymp.LCL","asymp.UCL") effect.sizes write.csv(effect.sizes, file="vanoystaeyen.effectsizes.resorbed.csv") # note: excluding body size and/or colony size from the model results in qualitatively identical results for treatment # S9b. BINOMIAL MODEL FOR OVARY DEVELOPMENT # we compare the proportion of workers with a ready-to-lay egg (i.e. with a mature, non-resorbed terminal oocyte, which is ovary development scale IV of Duchateau & Velthuis 1989, the same as column "reproductive" in the Dryad file) set_sum_contrasts() developed.model <- glmer(developed.non.resorbed ~ (1|colony_id) + treatment + colony_sizec + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) Anova(developed.model, type="III") # significant effects of treatment and body size set_treatment_contrasts() developed.model <- glmer(developed.non.resorbed ~ (1|colony_id) + treatment + colony_sizec + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(developed.model) summary(developed.model) summary.developed <- data.frame(summary(developed.model)$coefficients) write.csv(summary.developed, file="vanoystaeyen.summary.developed.glmm.csv") plot(effect(developed.model, term="treatment"), type="response") plot(effect(developed.model, term="bodysizec"), type="response") # larger workers were more likely to have developed ovaries developed.df <- data.frame(confint(lsmeans(developed.model, list(~treatment)), type="response")) write.csv(developed.df, file="vanoystaeyen.lsmeans.developed.glmm.csv") padj <- summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") pvals <- summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response") pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="vanoystaeyen.contrasts.developed.glmm.csv") #for effect size table summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") effect.sizes <- confint(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="none")) effect.sizes[,2:6] <- 1/exp(effect.sizes[,2:6]) colnames(effect.sizes) <- c("contrast","odds_ratio","SE","df","asymp.UCL","asymp.LCL") effect.sizes write.csv(effect.sizes, file="vanoystaeyen.effectsizes.developed.csv") # note: excluding body size and/or colony size from the model results in qualitatively identical results for treatment, as did scoring ovary development stages 3 or 4 as developed # S10. RE-ANALYSIS OF B. TERRESTRIS DATA REPORTED BY HOLMAN (PeerJ, 2014) WITH INCLUSION OF BODY SIZE # read in data from PeerJ website as included in original study d<-read.csv("https://dfzljdn9uc3pi.cloudfront.net/2014/604/1/Holman_2014_PeerJ_Bumblebees_-_raw_data.csv",header=T) d<-d[,1:4] d$body.size<-((d$Worker.size=="S")*1+(d$Worker.size=="M")*2+(d$Worker.size=="L")*3) # workers were here scored as small, medium or large - we recode this as 1 to 3 d$bodysizec<-scale(d$body.size, center=TRUE, scale=FALSE) # and we mean center body size colnames(d)<-c("colony_id","treatment","worker.size","oocyte_number","bodysize","bodysizec") d$treatment<-factor(d$treatment,levels=c("Hexane","C25"),ordered=F) d$treatment<-relevel(d$treatment,ref="Hexane") d$developed<-(d$oocyte_number>10)*1 # workers with more than 10 visible oocytes were scored as having developed ovaries head(d) # sample sizes table(d$treatment, d$colony_id) # S10a. BINOMIAL MODEL FOR OVARY DEVELOPMENT set_sum_contrasts() developed.model <- glmer(developed ~ (1|colony_id) + treatment + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) Anova(developed.model, type="III") # significant effects of treatment and body size set_treatment_contrasts() developed.model <- glmer(developed ~ (1|colony_id) + treatment + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(developed.model) summary(developed.model) summary.developed <- data.frame(summary(developed.model)$coefficients) write.csv(summary.developed, file="holman.summary.developed.glmm.csv") plot(effect(developed.model, term="treatment"), type="response") plot(effect(developed.model, term="bodysizec"), type="response") # larger workers were more likely to have developed ovaries developed.df <- data.frame(confint(lsmeans(developed.model, list(~treatment)), type="response")) write.csv(developed.df, file="holman.lsmeans.developed.glmm.csv") padj <- summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") pvals <- summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="none"), type="response") pvals$p.FDRadj <- padj$p.value write.csv(pvals, file="holman.contrasts.developed.glmm.csv") #for effect size table summary(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") effect.sizes <- confint(contrast(lsmeans(developed.model, list(~treatment)), method="trt.vs.ctrl", adjust="none")) effect.sizes[,2:6] <- 1/exp(effect.sizes[,2:6]) colnames(effect.sizes) <- c("contrast","odds_ratio","SE","df","asymp.UCL","asymp.LCL") effect.sizes write.csv(effect.sizes, file="holman.effectsizes.developed.csv") # note: excluding body size results in qualitatively identical results for treatment #10b. NEGATIVE BINOMIAL MODEL TO COMPARE NUMBER OF VISIBLE OOCYTES IN OVARIES # note: we use negative binomial rather than Poisson to allow large number of zeros set_sum_contrasts() oocytenr.model <- glmer.nb(oocyte_number ~ (1|colony_id) + treatment + bodysizec, data=d, control=glmerControl(optimizer="bobyqa")) Anova(oocytenr.model, type="III") # significant effects of treatment and body size set_treatment_contrasts() oocytenr.model <- glmer.nb(oocyte_number ~ (1|colony_id) + treatment + bodysizec, data=d, family=binomial, control=glmerControl(optimizer="bobyqa")) AIC(oocytenr.model) summary(oocytenr.model) summary.oocytenr <- data.frame(summary(oocytenr.model)$coefficients) write.csv(summary.oocytenr, file="holman.summary.oocytenr.glmm.csv") plot(effect(oocytenr.model, term="bodysizec"), type="response") # larger workers were more likely to have developed ovaries oocytenr.df <- data.frame(confint(lsmeans(oocytenr.model, list(~treatment)), type="response")) write.csv(oocytenr.df, file="holman.lsmeans.oocytenr.glmm.csv") pvals <- summary(contrast(lsmeans(oocytenr.model, list(~treatment)), method="trt.vs.ctrl", adjust="fdr"), type="response") write.csv(pvals, file="holman.contrasts.oocytenr.glmm.csv")