# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Boisville & al # # # # # # # # Sexual dimorphism in the walrus mandible, # # # # # # # # comparative description and geomorphometry # # # # # # # # Written by N. chatar 2022 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # library(geomorph) library(Morpho) setwd("C:/Users/Nari/Desktop/Script Mathieu") # Importation of the .tps files actuel<-readland.tps("Raw coordinates 2.tps", specID=c("imageID"),negNA=FALSE,readcurves = TRUE, warnmsg=TRUE) # Definition of the semilandmarks sliders = rbind(define.sliders(c(1,9:13,2)), define.sliders(c(2,14:23,3)), define.sliders(c(3,24:39,4)), define.sliders(c(4,40:49,5)), define.sliders(c(5,50:54,6)), define.sliders(c(6,55:59,7)), define.sliders(c(7,60:69,8)), define.sliders(c(8,70:83,1))) # Perform Generalised Procrustes analyse y <- gpagen(actuel, curves=sliders) classifier <- read.csv2("SpecimensWalrus Ontogenic Stage.csv", header=T, row.names=1) plot(y) # # # PCA # # # PCA <- gm.prcomp(y$coords) eigenvalues <- PCA$d scores <- PCA$x # # # Labels # # # sex <- classifier$sex age <- factor(classifier$Age, levels = c("juvenile", "adult")) specimen_number <- row.names(classifier) number <- classifier$number # Define particular color and shape for different Sex colors_to_plot <- c("na" = "#5BDDDE", "f" = "#AA4A44", "m" = "#0A9F00") shape_to_plot <- c("na" = 17, "f" = 25, "m" = 22) # # # Plot PCA # # # # Creation of the df for the ggplot df_pca <-cbind(as.data.frame(scores),specimen_number, sex) install.packages("ggplot2") install.packages('ggfortify') install.packages("ggthemes") install.packages("ggrepel") library(ggplot2) library(ggfortify) library(ggthemes) library(ggrepel) # Plot the % of variance explained by each axis df_eigenvalues <- as.data.frame(cbind(dimension = 1:length(eigenvalues), Percentage = round(((eigenvalues/sum(eigenvalues))*100), digits = 2))) barplot_percentage <- ggplot(df_eigenvalues, aes(x=dimension, y=Percentage)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + labs(x = paste0('PC'), y = paste0('% of variance explained')) + scale_x_continuous(breaks = round(seq(min(df_eigenvalues$dimension), max(df_eigenvalues$dimension), by = 1))) + geom_bar(stat = "identity", fill = "#ADD8E6", alpha = 0.8) barplot_percentage # # # PC1 vs PC2, variance explained = 51,46% # # # # Calculate the hulls for each group split(df_pca[,1:2], df_pca$sex) chull_PCA12 <- lapply(split(df_pca, df_pca$sex), function(df){ df[chull(df),] }) chull_PCA12 <- do.call(rbind, chull_PCA12) # PCA with ggplot PC1vsPC2 <- ggplot(data=df_pca[,1:2],aes(x=df_pca[,1],y=df_pca[,2], color = sex, shape = sex)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + geom_point(aes(size = age,alpha=0.8)) + scale_color_manual(values = colors_to_plot) + scale_fill_manual(values = colors_to_plot) + scale_shape_manual(values = shape_to_plot) + geom_text_repel(aes(label=number), size=3) + labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), y = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%')) + geom_polygon(data = chull_PCA12, aes(x= Comp1, y = Comp2, fill = sex, colour = sex), alpha = 0.2) PC1vsPC2 # # # PC1 vs PC3, variance explained = 43.6 % # # # # Calculate the hulls for each group split(df_pca[,c(1,3,41)], df_pca$sex) chull_PCA13 <- lapply(split(df_pca[,c(1,3,41)], df_pca$sex), function(df){ df[chull(df),] }) chull_PCA13 <- do.call(rbind, chull_PCA13) # PCA with ggplot PC1vsPC3 <- ggplot(data=df_pca,aes(x=df_pca[,1],y=df_pca[,3],color = sex, shape = sex)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + geom_point(aes(size = age,alpha=0.8)) + scale_color_manual(values = colors_to_plot) + scale_fill_manual(values = colors_to_plot) + scale_shape_manual(values = shape_to_plot) + geom_text_repel(aes(label=number), size=3) + labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), y = paste0('PC3 = ', round(((eigenvalues[3]/sum(eigenvalues))*100), digits = 2), '%')) + geom_polygon(data=chull_PCA13, aes(x= Comp1, y = Comp3, fill = sex, colour = sex), alpha = 0.2) PC1vsPC3 # # # PC2 vs PC3, variance explained = 29.22 % # # # # Calculate the hulls for each group split(df_pca[,c(2,3,41)], df_pca$sex) chull_PCA23 <- lapply(split(df_pca[,c(2,3,41)], df_pca$sex), function(df){ df[chull(df),] }) chull_PCA23 <- do.call(rbind, chull_PCA23) # PCA with ggplot PC2vsPC3 <- ggplot(data=df_pca,aes(x=df_pca[,2],y=df_pca[,3],color = sex, shape = sex)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + geom_point(aes(size = age,alpha=0.8)) + scale_color_manual(values = colors_to_plot) + scale_fill_manual(values = colors_to_plot) + scale_shape_manual(values = shape_to_plot) + geom_text_repel(aes(label=number), size=3) + labs(x = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%'), y = paste0('PC3 = ', round(((eigenvalues[3]/sum(eigenvalues))*100), digits = 2), '%')) + geom_polygon(data=chull_PCA23, aes(x= Comp2, y = Comp3, fill = sex, colour = sex), alpha = 0.2) PC2vsPC3 # Extreme shapes on PC axes PC1 <- PCA$x[,1] PC2 <- PCA$x[,2] PC3 <- PCA$x[,3] M <- mshape(y$coords) # EXTREME PC1 preds <- shape.predictor(A = y$coords, x= PC1, pred1 = max(PC1), pred2 = min(PC1)) plotRefToTarget(M, preds$pred1) # MAX PC1 plotRefToTarget(M, preds$pred2) # MIN PC1 # EXTREME PC2 preds <- shape.predictor(A = y$coords, x= PC2, pred1 = max(PC2), pred2 = min(PC2)) plotRefToTarget(M, preds$pred1) # MAX PC2 plotRefToTarget(M, preds$pred2) # MIN PC2 # EXTREME PC3 preds <- shape.predictor(A = y$coords, x= PC3, pred1 = max(PC3), pred2 = min(PC3)) plotRefToTarget(M, preds$pred1) # MAX PC3 plotRefToTarget(M, preds$pred2) # MIN PC3 # # # (1) Procrustes ANOVA # # # gdf <- geomorph.data.frame(y, Sexe = as.factor(Sex)) # # Sex # # #On the identified males and females #Extract males and females from the column 'Sex' MalesFemales <-classifier$sex %in% c("f", "m") fit1 <- procD.lm(coords[,,MalesFemales]~Sex[MalesFemales],data=gdf,iter=999) summary(fit1) # # AGE # # # On the whole dataset gdf2 <- geomorph.data.frame(y, age = as.factor(classifier$Age)) fit <- procD.lm(coords~age,data=gdf2,iter=999) summary(fit) # # Log Centroid Size # # gdf3 <- geomorph.data.frame(y, size = log(y$Csize)) fit <- procD.lm(coords~size,data=gdf3,iter=999) summary(fit) # # # (2) Linear Discriminant analysis # # # library(MASS) library(tidyverse) library(caret) coords2d <- as.data.frame(two.d.array(y$coords)) coords2d <- cbind(coords2d, sex) # M-test for homogeneity of covariance matrices install.packages("heplots") library(heplots) res <- boxM(coords2d, group = sex) res summary(res) # On the identified males and females # Split the data into training (80%) and test set (20%) coords2d_identified <- coords2d[coords2d$sex %in% c("f", "m"), ] set.seed() training.individuals <- sex[MalesFemales] %>% createDataPartition(p = 0.7, list = FALSE) train.data_identified <- coords2d_identified[training.individuals, ] test.data_identified <- coords2d_identified[-training.individuals, ] # Estimate preprocessing parameters preproc.parameter_identified <- train.data_identified %>% preProcess(method = c("center", "scale")) # Transform the data using the estimated parameters train.transform_identified <- preproc.parameter_identified %>% predict(train.data_identified) test.transform_identified <- preproc.parameter_identified %>% predict(test.data_identified) # Fit the model model_identified <- lda(sex ~., data = train.transform_identified, CV = TRUE) # Get the cross validated error rate table(train.transform_identified$sex, model_identified$class, dnn = c('Actual Group','Predicted Group')) # Make predictions predictions_identified <- model_identified %>% predict(test.transform_identified) # Model accuracy mean(predictions_identified$class==test.transform_identified$Sex) # Plot plot(model_identified) # On the identified males and females ADULT ONLY # Split the data into training (80%) and test set (20%) coords2d_identified_a <- filter(coords2d, sex %in% c("f", "m") & age %in% c("adult")) Sex_adults <- coords2d_identified_a$sex set.seed() training.individuals <- Sex_adults %>% createDataPartition(p = 0.8, list = FALSE) train.data_identified_a <- coords2d_identified_a[training.individuals, ] test.data_identified_a <- coords2d_identified_a[-training.individuals, ] # Estimate preprocessing parameters preproc.parameter_identified_a <- train.data_identified_a %>% preProcess(method = c("center", "scale")) # Transform the data using the estimated parameters train.transform_identified_a <- preproc.parameter_identified_a %>% predict(train.data_identified_a) test.transform_identified_a <- preproc.parameter_identified_a %>% predict(test.data_identified_a) # Fit the model model_identified_a <- lda(sex ~., data = test.transform_identified_a, CV = TRUE) # Get the cross validated error rate table(train.transform_identified_a$sex, model_identified_a$class, dnn = c('Actual Group','Predicted Group')) # Make predictions predictions_identified_a <- model_identified_a %>% predict(test.transform_identified_a) # Model accuracy mean(predictions_identified_a$class==test.transform_identified_a$Sex) # Plot plot(model_identified_a) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # (3) ALLOMETRY # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # install.packages("ggpubr") library(ggpubr) df_allometry <- cbind(df_pca, size = y$Csize) #Extract identified males and females df_allometry <- df_allometry[sex %in% c("f", "m"),] names(df_allometry)[names(df_allometry) == 'sex'] <- 'sex_mf' # First 4 axes plot_list = list() for (i in 1:4) { plot_list[[i]] <- local ({ i<- i p <- ggplot(df_allometry, aes(x = log(size), y = df_allometry[,i], group = sex_mf)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + geom_point(aes(color = sex_mf, shape = sex_mf, size = size)) + scale_color_manual(values = colors_to_plot) + scale_shape_manual(values=shape_to_plot) + geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) + labs(x = paste0('Log Centroid size'), y = paste0('PC ', paste(i))) + stat_regline_equation(aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~"))) }) } ggarrange(plotlist = plot_list, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom") # PC1 vs PC2 size PC1vsPC2 <- ggplot(data=df_pca[,1:2],aes(x=df_pca[,1],y=df_pca[,2], color = sex, shape = sex)) + theme_minimal() + theme(axis.line = element_line(color = "darkgrey", size = 0.5)) + geom_point(aes(size = log(y$Csize),alpha=0.8)) + scale_color_manual(values = colors_to_plot) + scale_fill_manual(values = colors_to_plot) + scale_shape_manual(values = shape_to_plot) + geom_text_repel(aes(label=number), size=3) + labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), y = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%')) + geom_polygon(data = chull_PCA12, aes(x= Comp1, y = Comp2, fill = sex, colour = sex), alpha = 0.2) PC1vsPC2