############################## # # Script for preparing Observation data # Need to have duration of behaviour per individual per observation # Including individuals that were nobserved but not recorded working # 12/12/19 # ############################### # Load packages if (!require(data.table)) install.packages("data.table") library(data.table) if (!require(ggplot2)) install.packages("ggplot2") library(ggplot2) if (!require(lme4)) install.packages("lme4") library(lme4) if (!require(gridExtra)) install.packages("gridExtra") library(gridExtra) if (!require(dplyr)) install.packages("dplyr") library(dplyr) if (!require(DHARMa)) install.packages("DHARMa") library(DHARMa) if (!require(LMERConvenienceFunctions)) install.packages("LMERConvenienceFunctions") library(LMERConvenienceFunctions) if (!require(AICcmodavg)) install.packages("AICcmodavg") library(AICcmodavg) if (!require(SummarizedExperiment)) install.packages("SummarizedExperiment") library(SummarizedExperiment) DIR <- # filepath to project directory DATADIR <- # filepath to data directory DurationsObserved <- read.csv(file.path(paste(DATADIR,"BEHAVIOUR_DURATIONS_INPUT.ext", sep="")), sep="\t") Ind <- read.csv(file.path(paste(DATADIR,"INDIVIDUAL_INFO_INPUT.ext", sep="")), sep="\t") # Create dataframe and add the duration of observation per session per individual. Each individual was observed 6 times. # If there are fewer than 6 durations, corresponding to observations when working was not recorded, fill in with 0. Number_Individuals <- (length(unique(Ind$TagNo))) * 6 DurationsAll <- data.frame(matrix("", nrow=Number_Individuals, ncol=1)) # create blank dataframe with one column and 6 rows per individual DurationsAll$Obs_Per_Ind <- c(1:6) # Repeat 1-6, observation per individual DurationsAll$TagNo <- rep(unique(Ind$TagNo), each=6) # Add individual (tag number) to 6 rows DurationsAll <- merge(Ind[,c("TagNo","Colony")], DurationsAll) # Bring Colony from Ind and merge by TagNo DurationsAll$matrix.....nrow...Number_Individuals..ncol...1. <- NULL # remove variable # to number the individual's observations (first time observed, second time observed etc) DurationsObserved$Obs_Per_Ind <- rowid(DurationsObserved$TagNo) # merge the data frames leaving each individual's observations split by observation session, filling in NAs with zero DurationsAll <- (merge(DurationsAll, DurationsObserved, all.x=T)) DurationsAll$BehaviourDuration_seconds <- as.numeric(DurationsAll$BehaviourDuration_seconds) row.index <- (which(is.na(DurationsAll$BehaviourDuration_seconds))) DurationsAll[row.index,"BehaviourDuration_seconds"] <- 0 # merge individual characteristics DurationsAll <- merge(DurationsAll, Ind[, c("TagNo", "AgeMonths","GroupSize","MeanWeight", "Rank", "ScaledRank", "Sex", "Status", "TotalDurationCooperation", "ScaledRandom_Rank")], by = "TagNo") DurationsAll$Sex <- as.factor(DurationsAll$Sex) # remove question mark symbols from input files DurationsAll$Sex <- gsub("?", "", DurationsAll$Sex, fixed=TRUE) DurationsAll$Status <- gsub("B?", "B", DurationsAll$Status, fixed=TRUE) # B? = red vagina. May indicate reproductive activation but not evidence of full reproductive status DurationsAll$Status <- as.factor(DurationsAll$Status) # binary cooperation for models DurationsAll$Coop_Binary <- factor(ifelse(DurationsAll$BehaviourDuration_seconds >0, 1,0)) # create subsets for breeders and non-breeders NonBreeders <- subset(DurationsAll, DurationsAll$Status=="NB" & !is.na(DurationsAll$AgeMonths) & !is.na(DurationsAll$TagNo) & !is.na(DurationsAll$ScaledRank)) Breeders <- subset(DurationsAll, DurationsAll$Status=="B") # Create logged variables NonBreeders$Log_Coop <- log(NonBreeders$BehaviourDuration_seconds) NonBreeders$Log_Age <- log(NonBreeders$AgeMonths) NonBreeders$Log_Weight <- log(NonBreeders$MeanWeight) NonBreeders$Log_Rank <- log(NonBreeders$ScaledRank) # Create subset of observations of non-breeders that had non-zero work durations (working behaviour was observed) NBNZ <- NonBreeders[NonBreeders$BehaviourDuration_seconds != 0, ] # Non-Breeder Non-Zero NBNZ$Sex <- as.factor(NBNZ$Sex) # Create regressions and look at distributions of residuals # Starting with logistic regression: do variables predict whether observation session is non-zero? # Combine functions of the DHARMa package that assess residuals from Logistic regressions LogRegResiduals <- function(model) { par(mfrow=c(2,2)) fittedModel <- model # plot(resid(fittedModel, type="deviance")) simulationOutput <- simulateResiduals(fittedModel = fittedModel, refit=F) plot(simulationOutput, quantreg = T) testDispersion(simulationOutput, alternative="two.sided", plot=T) testZeroInflation(simulationOutput) testOutliers(simulationOutput, alternative = "two.sided", plot = T) } # Define models LogReg_Null <- glmer(Coop_Binary ~ (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Sex <- glmer(Coop_Binary ~ Sex + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Age <- glmer(Coop_Binary ~ AgeMonths + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Weight <- glmer(Coop_Binary ~ MeanWeight + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Rank <- glmer(Coop_Binary ~ ScaledRank + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Random_Rank <- glmer(Coop_Binary ~ ScaledRandom_Rank + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Sq_Weight <- glmer(Coop_Binary ~ poly(MeanWeight,2) + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Sq_Age <- glmer(Coop_Binary ~ poly(AgeMonths,2) + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogReg_Cu_Age <- glmer(Coop_Binary ~ poly(AgeMonths,3) + (1|Colony/TagNo), data = NonBreeders, family = "binomial") LogRegList <- c(LogReg_Null,LogReg_Sex,LogReg_Age,LogReg_Sq_Age,LogReg_Cu_Age, LogReg_Weight,LogReg_Sq_Weight,LogReg_Rank, LogReg_Random_Rank) # run and inspect residual checks on Logistic regressions for (i in LogRegList){ LogRegResiduals(i) } # compare AIC values aictab(LogRegList, sort=T, modnames=c("LogReg_Null","LogReg_Sex","LogReg_Age","LogReg_Sq_Age", "LogReg_Cu_Age","LogReg_Weight","LogReg_Sq_Weight","LogReg_Rank", "LogReg_Random_Rank")) # To print full model summaries or co-efficients for (i in LogRegList){ print(i) print(coef(summary(i))) } ### Second part of the 2-part models for semi-continuous data # running linear mixed-effects models on the non-zero data # define the models glmnull <- lmer(BehaviourDuration_seconds ~ (1|Colony/TagNo), data=NBNZ, REML=F) glmsex <- lmer(BehaviourDuration_seconds ~ Sex + (1|Colony/TagNo), data=NBNZ, REML=F) glmage <- lmer(BehaviourDuration_seconds ~ AgeMonths + (1|Colony/TagNo), data=NBNZ, REML=F) glmage_sq <- lmer(BehaviourDuration_seconds ~ poly(AgeMonths,2) + (1|Colony/TagNo), data=NBNZ, REML=F) glmweight <- lmer(BehaviourDuration_seconds ~ MeanWeight + (1|Colony/TagNo), data=NBNZ, REML=F) glmrank <- lmer(BehaviourDuration_seconds ~ ScaledRank + (1|Colony/TagNo), data=NBNZ, REML=F) GLMlist <- c(glmnull, glmsex, glmage, glmage_sq, glmweight, glmrank) # for each model, display residual information and model summaries for (i in GLMlist) { qqplot(resid(i), fitted(i)) mcp.fnc(i) mean(resid(i)) hist(resid(i)) print(summary(i)) } # compare AIC values aictab(GLMlist, sort=T, c.hat=T, modnames=c("glmnull","glmsex","glmage", "glmage_sq","glmweight","glmrank")) ### Colony-level data # Create new dataframe and add information - ColonyData # Number of observations per colony ObservationsPerColony <- as.data.frame(table(NonBreeders$Colony)) colnames(ObservationsPerColony) <- c("Colony", "NumberOfObservations") # Total durations per colony DurationPerColony <- as.data.frame(tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Colony, sum)) setDT(DurationPerColony, keep.rownames = TRUE)[] colnames(DurationPerColony) <- c("Colony", "ColonyDuration_seconds") # Define ColonyData from file with basic info ColonyData <- read.csv(file.path(DATADIR,paste("COLONY_INPUT.EXT")), sep=",") # Merge and add variables for how many sessions recorded or did not record working behaviour ColonyData <- merge(ColonyData, ObservationsPerColony, all.x=T, all.y=T) ColonyData <- merge(ColonyData, DurationPerColony, all.x=T, all.y=T) BehaviourPresentPerColony <- t(table(NonBreeders$Coop_Binary, NonBreeders$Colony)) BehaviourPresentPerColony <- setDT(as.data.table(BehaviourPresentPerColony), keep.rownames = TRUE)[] BehaviourPresentPerColony <- cbind(BehaviourPresentPerColony, unstack(BehaviourPresentPerColony, form= N ~ V2)) BehaviourPresentPerColony <- BehaviourPresentPerColony[(1:8), c(1,3,5)] colnames(BehaviourPresentPerColony) <- c("Colony", "Behaviour_Absent", "Behaviour_Present") ColonyData <- merge(ColonyData, BehaviourPresentPerColony) # Add more data to the dataframe ColonyData$MeanCoopPerObservation <- with(ColonyData, ColonyData$ColonyDuration_seconds/ColonyData$Behaviour_Present) ColonyData$Percent_Present <- 100*ColonyData$Behaviour_Present/(ColonyData$Behaviour_Present+ColonyData$Behaviour_Absent) ColonyData$MeanColonyWeight <- tapply(NonBreeders$MeanWeight, NonBreeders$Colony, mean) ColonyData$MeanAgePerColony <- tapply(NonBreeders$AgeMonths, NonBreeders$Colony, mean) ColonyData$LengthPerIndividual <- with(ColonyData, ColonyData$Length/ColonyData$NumberofIndividualsAtFirstObs) ColonyData$AreaPerIndividual <- with(ColonyData, ColonyData$Area/ColonyData$NumberofIndividualsAtFirstObs) ColonyData$NumberObserved <- table(NonBreeders$Colony)/6 Females <- subset(NonBreeders, NonBreeders$Sex=="F") ColonyData$NumberFemales <- table(Females$Colony)/6 ColonyData$ProportionFemale <- (ColonyData$NumberFemales / ColonyData$NumberObserved) ColonyData$Weight_SD <- tapply(NonBreeders$MeanWeight, NonBreeders$Colony, sd) ColonyData$Age_SD <- tapply(NonBreeders$AgeMonths, NonBreeders$Colony, sd) ColonyData$Age_Range <- tapply(NonBreeders$AgeMonths, NonBreeders$Colony, range) ColonyData$Weight_Range <- tapply(NonBreeders$MeanWeight, NonBreeders$Colony, range) ColonyData$Behaviour_mean <- tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Colony, mean) ColonyData$Behaviour_sd <- tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Colony, sd) ColonyData <- ColonyData %>% mutate_if(is.numeric, round, digits = 1) ### General summary stats mean(NonBreeders$AgeMonths) range(NonBreeders$AgeMonths) mean(NonBreeders$MeanWeight) range(NonBreeders$MeanWeight) mean(NonBreeders$BehaviourDuration_seconds) sd(NonBreeders$BehaviourDuration_seconds) # average (sd) and percentage time spent working tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Sex, mean) tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Sex, sd) (100*tapply(NonBreeders$BehaviourDuration_seconds, NonBreeders$Sex, mean))/600 mean(NonBreeders$BehaviourDuration_seconds) sd(NonBreeders$BehaviourDuration_seconds) (100*(mean(NonBreeders$BehaviourDuration_seconds)))/600 ### Creating figures # define theme for graphs of logistic regressions Logistic_Regression_theme <- theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.title = element_text(size=18, hjust=0.5), text = element_text(size=16, family="serif"), axis.line = element_line(colour = "black"), panel.border = element_rect(colour = "black", fill=NA, size=2)) # panel.border=element_blank()) Logistic_Regression_Line <- stat_smooth(method = 'glm', method.args = list(family = "binomial"), formula=y~x, alpha=0.2, size=1, fill="black", colour="black") Points <- geom_point(position=position_jitter(height=0.03, width=0)) Counts <- geom_count(col="black",show.legend = F) # Plot observed working by age LogisticByAge_GlmLine <- ggplot(NonBreeders, aes(AgeMonths, as.numeric(Coop_Binary)-1)) + Logistic_Regression_theme + Logistic_Regression_Line + Counts + scale_y_continuous(breaks=c(0,0.5,1)) + labs(title="A. Age", x="Age (Months)", y="Observed Working (1 = Yes)") LogisticByWeight <- ggplot(NonBreeders, aes(MeanWeight, as.numeric(Coop_Binary)-1)) + Logistic_Regression_theme + Logistic_Regression_Line + Counts + scale_y_continuous(breaks=c(0,0.5,1)) + labs(title="B. Weight", x="Weight (Grams)", y="Observed Working (1 = Yes)") LogisticByRank <- ggplot(NonBreeders, aes(ScaledRank, as.numeric(Coop_Binary)-1)) + Logistic_Regression_theme + Logistic_Regression_Line + Counts + scale_y_continuous(breaks=c(0,0.5,1)) + labs(title="C. Rank", x="Rank (0-1, Scaled)", y="Observed Working (1 = Yes)") # define plots to save and save to PDF file LogPlots <- list(LogisticByAge_GlmLine, LogisticByWeight, LogisticByRank) pdf(paste0(file.path(DIR,"Figures", sep=""), "Figure1_WorkPresent.pdf",sep=""), onefile = TRUE) LogPlots dev.off() # Define theme for graphs of linear regressions LinearModel_theme <- theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.title = element_text(size=18, hjust=0.5), text = element_text(size=16, family="serif"), axis.line = element_line(colour = "black"), panel.border = element_rect(colour = "black", fill=NA, size=2)) LinearModel_Line <- stat_smooth(method = 'lm', formula=y~x, alpha=0.2, size=1, fill="black", colour="black") CoopByAge <- ggplot(NBNZ, aes(x = NBNZ$AgeMonths, y = NBNZ$BehaviourDuration_seconds)) + LinearModel_theme + LinearModel_Line + Points + labs(title="A. Age", x="Age (Months)", y="Duration Working (Seconds)") CoopByWeight <- ggplot(NBNZ, aes(x = NBNZ$MeanWeight, y = NBNZ$BehaviourDuration_seconds)) + LinearModel_theme + LinearModel_Line + Points + labs(title="B. Weight", x="Weight (Grams)", y="Duration Working (Seconds)") CoopByRank <- ggplot(NBNZ, aes(x = NBNZ$ScaledRank, y = NBNZ$BehaviourDuration_seconds)) + LinearModel_theme + LinearModel_Line + Points + labs(title="C. Rank", x="Rank (0-1, Scaled)", y="Duration Working (Seconds)") # define plots to save and save to PDF file GLMPlots <- list(CoopByAge, CoopByWeight, CoopByRank) pdf(paste0(file.path(DIR,"Figures", sep=""), "Figure2_WorkDuration.pdf",sep=""),onefile = TRUE) GLMPlots dev.off() # Colony Plots # Plot of work duration per observation by colony Colony_Coop <- ggplot(NonBreeders, aes(x=Colony, y=BehaviourDuration_seconds)) + geom_boxplot() + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none", plot.title = element_text(hjust = 0.5), text = element_text(size=16, family="serif"), axis.line = element_line(colour = "black"), panel.border = element_rect(colour = "black", fill=NA, size=2)) + labs(title="\n\n\n\n\n", x="Colony", y="Duration of Working Behaviour (Seconds)") pdf(paste0(file.path(DIR,"Figures", sep=""), "Figure3_MeanWorkByColony.pdf",sep=""), onefile = TRUE) Colony_Coop dev.off() # Plots of observed working (binary) by age, weight and rank for each colony Colony_LogAge <- LogisticByAge_GlmLine + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + facet_grid(cols=vars(NonBreeders$Colony)) Colony_LogWeight <- LogisticByWeight + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + facet_grid(cols=vars(NonBreeders$Colony)) Colony_LogRank <- LogisticByRank + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + scale_x_continuous(breaks = c(0, 0.25, 0.50, 0.75, 1), label = c("", ".25", ".50", ".75", "")) + facet_grid(cols=vars(NonBreeders$Colony)) # Define list and save outputs to PDF LogWorkByColony <- list(Colony_LogAge, Colony_LogWeight, Colony_LogRank) pdf(paste0(file.path(DIR,"Figures", sep=""), "Figure4_WorkPresenceByColony.pdf",sep=""), width = 25, height = 4, paper= "a4r", onefile = TRUE) LogWorkByColony dev.off() # Plots of duration of working by age, weight and rank for each colony Colony_LinAge <- CoopByAge + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + facet_grid(cols=vars(NBNZ$Colony)) Colony_LinWeight <- CoopByWeight + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + facet_grid(cols=vars(NBNZ$Colony)) Colony_LinRank <- CoopByRank + theme(panel.border = element_rect(colour = "black", fill=NA, size=1)) + scale_x_continuous(breaks = c(0, 0.25, 0.50, 0.75, 1), label = c("", ".25", ".50", ".75", "")) + facet_grid(cols=vars(NBNZ$Colony)) # Define list and save outputs to PDF LinWorkByColony <- list(Colony_LinAge, Colony_LinWeight, Colony_LinRank) pdf(paste0(file.path(DIR,"Figures", sep=""), "Figure5_WorkDurationByColony.pdf",sep=""), width = 25, height = 4, paper= "a4r", onefile = TRUE) LinWorkByColony dev.off()