--- title: | | **No sex difference in preen oil chemical composition during incubation in Kentish Plovers** | | R code author: "*M. Gilles, A. Kosztolányi, A. Rocha, I. C. Cuthill, T. Székely & B. A. Caspers*" output: pdf_document: toc: true # to generate a table of contents toc_depth: 3 # upto three depths of headings (specified by #, ## and ###) number_sections: true ## if you want number sections at each table header latex_engine: xelatex html_document: default header-includes: \usepackage{float} \floatplacement{figure}{H} # for the position of figures/plots --- ```{r global-options, include=FALSE} knitr::opts_chunk$set(warning=FALSE, message=FALSE) # settings for the whole documents: no message and no warning will be displayed ``` > Required packages ```{r, results='hide'} library(qpcR) # to use rbind.na function library(dplyr) # to manipulate data library(GCalignR) # to align and process chemical data library(vegan) # to analyse chemical data with mutlivariate analyses library(lme4) # to analyse alpha diversity with linear mixed models library(betareg) # to run beta regressions library(broom.mixed) # to get confidence intervals from linear mixed models library(partR2) # to get R2 and confidence intervals of fixed effects from mixed models library(performance) # to verify model assumptions library(simr) # to conduct power analyses library(ggplot2) # to plot library(lemon) # for plotting (coord_capped_cart) ``` # Data ## Chemical data Import the chemical data ```{r, results='hide'} prepforGCalignR <- read.csv("chemdata.csv", header = F, check.names = F) ``` Format the data for GCalignR ```{r, results='hide'} prepforGCalignR <- prepforGCalignR[,c(T,T,F,F)] # remove every 3rd and 4th row sampleids <- prepforGCalignR[1,] # create a dataframe with just the first row sampleids <- sampleids[,c(T,F)] # remove empty cell between each sampleID RTarea <- matrix(c("RT","area")) # prepare the row with RT/area RTarea <- t(RTarea) # flip columns and rows RTarea <- data.frame(RTarea) prepforGCalignR <- qpcR:::rbind.na(sampleids, RTarea, prepforGCalignR) # bind sampleIDs, RTarea and the data, fill the end of sampleIDs and RT/area with NAs prepforGCalignR <- prepforGCalignR[-c(3,4),] # remove unnecessary rows # save as a .txt file write.table(prepforGCalignR,"dataforGCalignR.txt", row.names = F, col.names = F, sep="\t", na = "", quote = F) check_input("dataforGCalignR.txt") # check that the data format is good ``` Create a vector with the name of blank samples ```{r} blanknames <- c("blank2707","blank2807","blank2907","blank3007", "blank0308","blank0408","blank0209") ``` Align chromatograms ```{r, cache=TRUE, results='hide'} aligned <- align_chromatograms(data = "dataforGCalignR.txt", rt_col_name = "RT", reference = "D59121_250619", # set the first sample analysed in GC-MS as the reference blanks = blanknames, # remove substances that are detected in blanks delete_single_peak = TRUE, # remove substances that are detected in one sample only remove_empty = TRUE, # remove empty samples max_diff_peak2mean = 0.02, # maximum difference to the mean RT across samples permute = F) # keep the order of samples constant ``` Check the output of the alignment procedure ```{r} print(aligned) ``` Heatmap of the aligned chromatograms ```{r} gc_heatmap(aligned,threshold = 0.02) ``` Normalize to get relative abundances ```{r results='hide'} chemdata <- norm_peaks(aligned, conc_col_name = "area",rt_col_name = "RT", out = "data.frame") ``` Average (and SD) number of substances per sample ```{r} mean <- mean(rowSums(chemdata != 0)) sd <- sd(rowSums(chemdata != 0)) data.frame(mean,sd) ``` Number and proportion of samples in which each substance occurs ```{r} # RT of each substance RT <- round(as.numeric(colnames(chemdata)), digits = 3) # number of samples where each substance occurs n_samples <- colSums(chemdata != 0) # proportion of samples where each substance occurs prop_samples <- colSums(chemdata != 0)/nrow(chemdata) # put it in a data frame subst_details <- data.frame(RT, n_samples, prop_samples) # number of substances detected in all samples nrow(subst_details[subst_details$n_samples == 20,]) ``` Relationship between the proportion of samples in which a substance occurs and the retention time of the substance ```{r, fig.dim = c(3, 3.5), fig.align="center"} # plot plot(subst_details$prop_samples~subst_details$RT, ylab = "Proportion of samples", xlab = "Retention time (min)") ``` Beta regression to test the quadratic relationship between the proportion of samples in which a substance occurs and the retention time of the substance ```{r, fig.dim = c(4, 3), fig.align="center"} # center RT to reduce the correlation between the linear and quadratic terms subst_details$RT_centered <- scale(subst_details$RT) # slightly modify proportion of samples to fit a beta regression (should not include 0 or 1) subst_details_modified <- subst_details subst_details_modified$prop_samples <- ifelse(subst_details_modified$prop_samples == 0, 0.000001, ifelse(subst_details_modified$prop_samples == 1, 0.999999, subst_details_modified$prop_samples)) # orthogonal polynomials using poly(X,2) # complementary log-log link as the proportion of samples is mostly very high or very low ortho <- betareg(prop_samples ~ poly(RT_centered,2), data = subst_details_modified, link = "cloglog") summary(ortho) check_model(ortho) ``` *The substances that are detected in all samples (N = 35 substances) all have an intermediate retention time (between 12 and 24 min), while substances with a lower or higher retention times are detected in relatively less samples. Indeed, we find a negative quadratic effect of the retention time of a substance on the proportion of samples in which it occurs (beta = -5.57, P < 0.001).* Average (and SD) relative proportion of each substance in samples ```{r} options(scipen = 999) # to disable scientific notation # average relative abundance of each substance subst_details$mean_abun <- colMeans(chemdata) subst_details$mean_abun <- round(subst_details$mean_abun, digits = 2) # sd relative abundance of each substance subst_details$sd_abun <- sqrt(diag(cov(chemdata))) subst_details$sd_abun <- round(subst_details$sd_abun, digits = 2) ``` Log-transform relative abundances to give more weight to low-abundance substances ```{r} chemdata <- log(chemdata + 1) ``` ## Metadata Import metadata ```{r} metadata <- read.csv("metadata.csv") ``` Add variable on independent versus paired (in a breeding pair) individuals ```{r} metadata$paired <- ifelse(duplicated(metadata$nest) | duplicated(metadata$nest, fromLast = TRUE), "paired", "independent") metadata$paired <- as.factor(metadata$paired) ``` Transform sex and nest variables as a factor ```{r} metadata$sex <- as.factor(metadata$sex) metadata$nest<- as.factor(metadata$nest) ``` Check that the design is balanced ```{r} table(metadata$sex) ``` Check how many samples are from individuals from the same nest (i.e. pair ID) ```{r} table(metadata$nest) ``` Check the variation in the seasonal variable (i.e. number of days after laying) ```{r} mean <- mean(metadata$daysafterlay) sd <- sd(metadata$daysafterlay) min <- min(metadata$daysafterlay) max <- max(metadata$daysafterlay) data.frame(mean,sd,min,max) ``` ```{r, fig.dim = c(3.5, 3.5), fig.align="center"} hist(metadata$daysafterlay, main = NULL, xlab = "Days after laying") ``` Set sample names as row names ```{r} rownames(metadata) <- metadata[,1] metadata <- metadata[,-1] ``` Make sure that the rows in chemdata and metadata are consistent ```{r} metadata <- metadata[rownames(metadata) %in% rownames(chemdata),] chemdata <- chemdata[rownames(chemdata) %in% rownames(metadata),] chemdata <- chemdata[match(rownames(metadata),rownames(chemdata)),] ``` # Analysis ## PERMANOVA Some samples are from independent individuals (N = 12) and from paired individuals (N = 8, from 4 breeding pairs). Because individuals from the same breeding pair may be more similar, we need to run the PERMANOVA using only independent individuals. Thus, we select the 12 independent samples, as well as 4 samples randomly selected among the paired individuals (always 1 male and 3 females, never from the same breeding pair), and iterate PERMANOVAs 1000 times. Doing so, we use all 20 samples, but run PERMANOVAs with 16 independent samples in a balanced design (8 females, 8 males). Having a balanced design is important, as PERMANOVA can be sensitive to differences in dispersion under unbalanced designs, especially with type I (sequential) sums of square. Here, we use type I (sequential) sums of square in order to test for the interaction between sex and the number of days after laying. ```{r} # number of PERMANOVAs (iterations) nad <- 1000 # number of permutations per PERMANOVA perm <- 9999 # vectors to store the results from the iterated PERMANOVA SSsex <- rep(NA,nad) SSdays <- rep(NA,nad) SSsexdays <- rep(NA,nad) SSres <- rep(NA,nad) R2sex <- rep(NA,nad) R2days <- rep(NA,nad) R2sexdays <- rep(NA,nad) R2res <- rep(NA,nad) Fsex <- rep(NA,nad) Fdays <- rep(NA,nad) Fsexdays <- rep(NA,nad) nFsex <- rep(NA,nad) nFdays <- rep(NA,nad) nFsexdays <- rep(NA,nad) # iterated PERMANOVA for(r in 1:nad) { # select the data # 12 independent birds independent_birds <- metadata %>% filter(paired == "independent")%>% select(nest) # 8 paired birds paired <- metadata %>% filter(paired == "paired") # select randomly 2 males from the paired birds paired_males <- paired %>% filter(sex == "M") %>% distinct(nest) %>% sample_n(2) # select randomly 2 females from the paired birds # (but not the partners of the selected males) paired_females <- paired %>% filter(sex == "F" & !nest %in% paired_males$nest) %>% distinct(nest) %>% sample_n(2) # 16 independent birds (7 females and 9 males) selection <- bind_rows(independent_birds, paired_males, paired_females) # selection of chemical data and metadata chemdata1 <- chemdata[rownames(selection),] metadata1 <- metadata[rownames(selection),] # PERMANOVA aout <- adonis2(chemdata1~sex*daysafterlay,data=metadata1,permutations=perm,by="terms") # store the results SSsex[r] <- aout$SumOfSqs[1] SSdays[r] <- aout$SumOfSqs[2] SSsexdays[r] <- aout$SumOfSqs[3] SSres[r] <- aout$SumOfSqs[4] R2sex[r] <- aout$R2[1] R2days[r] <- aout$R2[2] R2sexdays[r] <- aout$R2[3] R2res[r] <- aout$R2[4] Fsex[r] <- aout$F[1] Fdays[r] <- aout$F[2] Fsexdays[r] <- aout$F[3] nFsex[r] <- sum(attr(aout,"F.perm")[,1]>=aout$F[1]) nFdays[r] <- sum(attr(aout,"F.perm")[,2]>=aout$F[2]) nFsexdays[r] <- sum(attr(aout,"F.perm")[,3]>=aout$F[3]) } ``` Results from iterated PERMANOVA: Sums of squares ```{r} data.frame(sex=unclass(summary(SSsex)),days=unclass(summary(SSdays)), "sex:days"=unclass(summary(SSsexdays)),residuals=unclass(summary(SSres))) ``` Results from iterated PERMANOVA: *R2* ```{r} data.frame(sex=unclass(summary(R2sex)),days=unclass(summary(R2days)), "sex:days"=unclass(summary(R2sexdays)),residuals=unclass(summary(R2res))) ``` Results from iterated PERMANOVA: *F*-statistics ```{r} data.frame(sex=unclass(summary(Fsex)),days=unclass(summary(Fdays)), "sex:days"=unclass(summary(Fsexdays))) ``` Results from iterated PERMANOVA: *P*-values ```{r} data.frame(sex=sum(nFsex+1)/(nad*(perm+1)),days=sum(nFdays+1)/(nad*(perm+1)), "sex:days"=sum(nFsexdays+1)/(nad*(perm+1))) ``` *No effect of sex, the number of days after laying,the interaction between sex and the number of days after laying on preen oil composition (Bray-Curtis dissimilarities). No difference in location (centroids) between females and males.* Test whether dispersion differs between the sexes ```{r} chemdata_bc <- vegdist(chemdata) # Bray-Curtis dissimilarity matrix disp <- betadisper(chemdata_bc, metadata$sex, type = "centroid") disp$group.distances # distance to median for each group anova(disp) # test of difference in dispersion ``` *No difference in dispersion between females and males.* ## Non-metric multidimensional scaling (NMDS) Calculate Bray-Curtis dissimlarities ```{r, results='hide'} bc <- metaMDS(chemdata, distance = "bray") ``` Check the stress (how good the distance between samples in actual multivariate distance is represented in two dimensions) ```{r} bc$stress ``` *A stress value inferior to 0.1 indicates a good fit: the distance between samples in two dimensions represents well the actual multivariate distance between samples.* Plot ```{r, fig.dim = c(4, 4), fig.align="center"} # add NMDS to the metadata bc <- as.data.frame(bc[["points"]]) metadata$MDS1 <- bc$MDS1 metadata$MDS2 <- bc$MDS2 # plot ggplot(data = metadata) + geom_point(aes(MDS1, MDS2, color = sex, shape = sex, size = sex)) + # 95% confidence level for a multivariate t-distribution stat_ellipse(aes(MDS1, MDS2, color = sex), type = "t", level = 0.95) + theme_void() + scale_size_manual(values=c(5,6)) + scale_shape_manual(values=c(16,18)) + scale_color_manual(values = c("grey64","tan2")) + annotate("text", x = 0.305, y = -.195, label = "2D Stress: 0.06", size = 3.5) + theme(panel.background = element_rect(colour = "black", size = 1, fill = NA), aspect.ratio = 1, legend.title = element_blank(), legend.position = c(1.885, .88), legend.text = element_text(size=14), legend.background = element_rect(size = 0.4, linetype = "solid", color = "black"), legend.margin = margin(4, 8, 7, 5), plot.margin=unit(c(1,1,1,1),"cm")) ``` ## Shannon diversity (H) Calculate and add Shannon diversity (H) ```{r} metadata$H <- diversity(chemdata, "shannon") ``` Distribution of diversity ```{r, fig.dim = c(3.5, 3.5), fig.align="center"} hist(metadata$H, main = NULL, xlab = "Shannon diversity") ``` *The variable is positive, continuous, normal. We can therefore fit the linear regression with a Gaussian distribution.* Linear mixed model testing the effects of sex and season (days after laying) while controlling for pair ID ```{r} lmm.H <- lmer(H ~ sex * daysafterlay + (1|nest), data = metadata) summary(lmm.H) ``` Beta estimates (with confidence intervals) ```{r} tidy(lmm.H, conf.int = TRUE, conf.method = 'boot') ``` *Chemical diversity is not affected by sex or season.* Check model assumptions ```{r, fig.dim = c(8, 9), fig.align="center"} check_model(lmm.H) ``` Boxplot ```{r, fig.dim = c(2,4), fig.align="center"} ggplot(metadata, aes(x=sex, y=H, fill = sex))+ geom_boxplot()+ geom_jitter(position=position_jitter(0.1))+ scale_fill_manual(values=c("grey64","tan2"))+ labs(x = element_blank(), y = "Shannon diversity")+ theme_classic()+ theme(legend.position = "none", axis.title = element_text(size = 12))+ annotate("text", x = 1.5, y=4.025, label = "ns") + geom_segment(aes(x = 1, y = 4.008, xend = 2, yend = 4.008)) ``` ## Chemical richness (S) Calculate and add richness (number of substances; S) ```{r} metadata$S <- specnumber(chemdata) ``` Mean and SD number of substances in females and males ```{r} aggregate(metadata$S, list(metadata$sex), FUN=mean) aggregate(metadata$S, list(metadata$sex), FUN=sd) ``` Distribution of richness ```{r, fig.dim = c(3.5, 3.5), fig.align="center"} hist(metadata$S, main = NULL, xlab = "Chemical richness") ``` *The variable is positive continuous and normal. We can therefore fit the linear regression with a Gaussian distribution.* Linear mixed model testing the effects of sex and season (days after laying) while controlling for pair ID ```{r} lmm.S <- lmer(S ~ sex * daysafterlay + (1|nest), data = metadata) summary(lmm.S) ``` Beta estimates (with confidence intervals) ```{r} tidy(lmm.S, conf.int = TRUE, conf.method = 'boot') ``` *Chemical diversity is not affected by sex or season.* Check model assumptions ```{r, fig.dim = c(8, 9), fig.align="center"} check_model(lmm.S) ``` Boxplot ```{r, fig.dim = c(2, 4), fig.align="center"} ggplot(metadata, aes(x=sex, y=S, fill = sex))+ geom_boxplot()+ geom_jitter(position=position_jitter(0.1))+ scale_fill_manual(values=c("grey64","tan2"))+ scale_x_discrete(labels= c("Female","Male"))+ labs(x = element_blank(), y = "Number of substances")+ theme_classic()+ theme(legend.position = "none", axis.ticks.x =element_blank(), axis.text.x = element_text(size = 13), axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 14))+ annotate("text", x = 1.5, y=86.25, label = "ns") + geom_segment(aes(x = 1, y = 84.9, xend = 2, yend = 84.9)) ```