# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
              # # # #                                                                 # # # #
              # # # #                        Boisville & al                           # # # #
              # # # #           Sexual dimorphism in the walrus mandible,             # # # #
              # # # #          comparative description and geomorphometry             # # # #
              # # # #                 Written by N. chatar 2022                       # # # #
              # # # #                                                                 # # # # 
              # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 


library(geomorph)
library(Morpho)

setwd("C:/Users/Nari/Desktop/Script Mathieu")

# Importation of the .tps files
actuel<-readland.tps("Raw coordinates 2.tps", specID=c("imageID"),negNA=FALSE,readcurves = TRUE, warnmsg=TRUE)

# Definition of the semilandmarks
sliders = rbind(define.sliders(c(1,9:13,2)), 
                define.sliders(c(2,14:23,3)), 
                define.sliders(c(3,24:39,4)), 
                define.sliders(c(4,40:49,5)), 
                define.sliders(c(5,50:54,6)), 
                define.sliders(c(6,55:59,7)), 
                define.sliders(c(7,60:69,8)), 
                define.sliders(c(8,70:83,1)))


# Perform Generalised Procrustes analyse 
y <- gpagen(actuel, curves=sliders)
classifier <- read.csv2("SpecimensWalrus Ontogenic Stage.csv", header=T, row.names=1)

plot(y)

# # # PCA # # # 

PCA <- gm.prcomp(y$coords)
eigenvalues <- PCA$d
scores <- PCA$x

# # # Labels # # #  

sex               <- classifier$sex
age               <- factor(classifier$Age, levels = c("juvenile", "adult"))
specimen_number   <- row.names(classifier)
number            <- classifier$number


# Define particular color and shape for different Sex

colors_to_plot <- c("na" = "#5BDDDE", 
                    "f" = "#AA4A44",
                    "m" = "#0A9F00")

shape_to_plot <-  c("na" = 17, 
                    "f" = 25,
                    "m" = 22)


# # # Plot PCA # # #

# Creation of the df for the ggplot
df_pca <-cbind(as.data.frame(scores),specimen_number, sex)

install.packages("ggplot2")
install.packages('ggfortify')
install.packages("ggthemes")
install.packages("ggrepel")

library(ggplot2)
library(ggfortify)
library(ggthemes) 
library(ggrepel)

# Plot the % of variance explained by each axis
df_eigenvalues <- as.data.frame(cbind(dimension = 1:length(eigenvalues), Percentage = round(((eigenvalues/sum(eigenvalues))*100), digits = 2)))

barplot_percentage <- ggplot(df_eigenvalues, aes(x=dimension, y=Percentage)) + 
                            theme_minimal() +
                            theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
                            labs(x = paste0('PC'), 
                                 y = paste0('% of variance explained')) +
                            scale_x_continuous(breaks = round(seq(min(df_eigenvalues$dimension), 
                                                                  max(df_eigenvalues$dimension), 
                                                                  by = 1))) +
                            geom_bar(stat = "identity", fill = "#ADD8E6", alpha = 0.8)
barplot_percentage 

# # # PC1 vs PC2, variance explained = 51,46% # # # 

# Calculate the hulls for each group
split(df_pca[,1:2], df_pca$sex)
chull_PCA12 <- lapply(split(df_pca, df_pca$sex), function(df){
  df[chull(df),]
})

chull_PCA12 <- do.call(rbind, chull_PCA12)

# PCA with ggplot 

PC1vsPC2 <- ggplot(data=df_pca[,1:2],aes(x=df_pca[,1],y=df_pca[,2], color = sex, shape = sex)) +
            theme_minimal() +
            theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
            geom_point(aes(size = age,alpha=0.8)) +
            scale_color_manual(values = colors_to_plot) +
            scale_fill_manual(values = colors_to_plot) +
            scale_shape_manual(values = shape_to_plot) +
            geom_text_repel(aes(label=number), size=3) +
            labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), 
                 y = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%')) +              
            geom_polygon(data = chull_PCA12, aes(x= Comp1, y = Comp2, fill = sex, colour = sex), alpha = 0.2)  
PC1vsPC2 

# # # PC1 vs PC3, variance explained = 43.6 % # # # 

# Calculate the hulls for each group
split(df_pca[,c(1,3,41)], df_pca$sex)
chull_PCA13 <- lapply(split(df_pca[,c(1,3,41)], df_pca$sex), function(df){
  df[chull(df),]
})

chull_PCA13 <- do.call(rbind, chull_PCA13)

# PCA with ggplot 

PC1vsPC3 <- ggplot(data=df_pca,aes(x=df_pca[,1],y=df_pca[,3],color = sex, shape = sex)) +
            theme_minimal() +
            theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
            geom_point(aes(size = age,alpha=0.8)) +
            scale_color_manual(values = colors_to_plot) +
            scale_fill_manual(values = colors_to_plot) +
            scale_shape_manual(values = shape_to_plot) +
            geom_text_repel(aes(label=number), size=3) +
            labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), 
                 y = paste0('PC3 = ', round(((eigenvalues[3]/sum(eigenvalues))*100), digits = 2), '%')) +              
            geom_polygon(data=chull_PCA13, aes(x= Comp1, y = Comp3, fill = sex, colour = sex), alpha = 0.2)  
PC1vsPC3 

# # # PC2 vs PC3, variance explained = 29.22 % # # # 

# Calculate the hulls for each group
split(df_pca[,c(2,3,41)], df_pca$sex)
chull_PCA23 <- lapply(split(df_pca[,c(2,3,41)], df_pca$sex), function(df){
  df[chull(df),]
})

chull_PCA23 <- do.call(rbind, chull_PCA23)

# PCA with ggplot 
PC2vsPC3 <- ggplot(data=df_pca,aes(x=df_pca[,2],y=df_pca[,3],color = sex, shape = sex)) +
            theme_minimal() +
            theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
            geom_point(aes(size = age,alpha=0.8)) +
            scale_color_manual(values = colors_to_plot) +
            scale_fill_manual(values = colors_to_plot) +
            scale_shape_manual(values = shape_to_plot) +
            geom_text_repel(aes(label=number), size=3) +
            labs(x = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%'), 
                 y = paste0('PC3 = ', round(((eigenvalues[3]/sum(eigenvalues))*100), digits = 2), '%')) +              
            geom_polygon(data=chull_PCA23, aes(x= Comp2, y = Comp3, fill = sex, colour = sex), alpha = 0.2)  
PC2vsPC3

# Extreme shapes on PC axes

PC1 <- PCA$x[,1]
PC2 <- PCA$x[,2]
PC3 <- PCA$x[,3]
M <- mshape(y$coords)

# EXTREME PC1

preds <- shape.predictor(A = y$coords, x= PC1, 
                         pred1 = max(PC1), 
                         pred2 = min(PC1))

plotRefToTarget(M, preds$pred1) # MAX PC1
plotRefToTarget(M, preds$pred2) # MIN PC1

# EXTREME PC2

preds <- shape.predictor(A = y$coords, x= PC2, 
                         pred1 = max(PC2), 
                         pred2 = min(PC2))

plotRefToTarget(M, preds$pred1) # MAX PC2
plotRefToTarget(M, preds$pred2) # MIN PC2

# EXTREME PC3

preds <- shape.predictor(A = y$coords, x= PC3, 
                         pred1 = max(PC3), 
                         pred2 = min(PC3))

plotRefToTarget(M, preds$pred1) # MAX PC3
plotRefToTarget(M, preds$pred2) # MIN PC3

# # # (1) Procrustes ANOVA # # #

gdf <- geomorph.data.frame(y, Sexe = as.factor(Sex))

# # Sex # # 
#On the identified males and females

#Extract males and females from the column 'Sex'
MalesFemales <-classifier$sex %in% c("f", "m")

fit1 <- procD.lm(coords[,,MalesFemales]~Sex[MalesFemales],data=gdf,iter=999)
summary(fit1) 

# # AGE # # 
# On the whole dataset 
gdf2 <- geomorph.data.frame(y, age = as.factor(classifier$Age))

fit <- procD.lm(coords~age,data=gdf2,iter=999)
summary(fit) 

# # Log Centroid Size # # 
gdf3 <- geomorph.data.frame(y, size = log(y$Csize))
fit <- procD.lm(coords~size,data=gdf3,iter=999)
summary(fit) 

# # # (2) Linear Discriminant analysis # # #

library(MASS)
library(tidyverse)
library(caret)

coords2d <- as.data.frame(two.d.array(y$coords))
coords2d <- cbind(coords2d, sex)

# M-test for homogeneity of covariance matrices 
install.packages("heplots")
library(heplots)

res <- boxM(coords2d, group = sex)
res

summary(res)

# On the identified males and females
# Split the data into training (80%) and test set (20%)
coords2d_identified <- coords2d[coords2d$sex %in% c("f", "m"), ]

set.seed()
training.individuals <- sex[MalesFemales] %>% 
  createDataPartition(p = 0.7, list = FALSE)
train.data_identified <- coords2d_identified[training.individuals, ]
test.data_identified <- coords2d_identified[-training.individuals, ]

# Estimate preprocessing parameters
preproc.parameter_identified <- train.data_identified %>% 
  preProcess(method = c("center", "scale"))

# Transform the data using the estimated parameters
train.transform_identified <- preproc.parameter_identified %>% predict(train.data_identified)
test.transform_identified <- preproc.parameter_identified %>% predict(test.data_identified)

# Fit the model
model_identified <- lda(sex ~., data = train.transform_identified, CV = TRUE)

# Get the cross validated error rate
table(train.transform_identified$sex, model_identified$class, dnn = c('Actual Group','Predicted Group'))

# Make predictions
predictions_identified <- model_identified %>% predict(test.transform_identified)

# Model accuracy
mean(predictions_identified$class==test.transform_identified$Sex)

# Plot 

plot(model_identified)

# On the identified males and females ADULT ONLY

# Split the data into training (80%) and test set (20%)
coords2d_identified_a <- filter(coords2d, sex %in% c("f", "m") & age %in% c("adult"))
Sex_adults <- coords2d_identified_a$sex

set.seed()
training.individuals <- Sex_adults %>% 
  createDataPartition(p = 0.8, list = FALSE)
train.data_identified_a <- coords2d_identified_a[training.individuals, ]
test.data_identified_a <- coords2d_identified_a[-training.individuals, ]

# Estimate preprocessing parameters
preproc.parameter_identified_a <- train.data_identified_a %>% 
  preProcess(method = c("center", "scale"))

# Transform the data using the estimated parameters
train.transform_identified_a <- preproc.parameter_identified_a %>% predict(train.data_identified_a)
test.transform_identified_a <- preproc.parameter_identified_a %>% predict(test.data_identified_a)

# Fit the model
model_identified_a <- lda(sex ~., data = test.transform_identified_a, CV = TRUE)

# Get the cross validated error rate
table(train.transform_identified_a$sex, model_identified_a$class, dnn = c('Actual Group','Predicted Group'))

# Make predictions
predictions_identified_a <- model_identified_a %>% predict(test.transform_identified_a)

# Model accuracy
mean(predictions_identified_a$class==test.transform_identified_a$Sex)

# Plot 

plot(model_identified_a)


# # # # # # # # # # # # # # # # # # # # # # # #
# # # #                                 # # # #
# # # #       (3)  ALLOMETRY            # # # #
# # # #                                 # # # #
# # # # # # # # # # # # # # # # # # # # # # # #

install.packages("ggpubr")

library(ggpubr)

df_allometry <- cbind(df_pca, size = y$Csize)

#Extract identified males and females
df_allometry <- df_allometry[sex %in% c("f", "m"),]

names(df_allometry)[names(df_allometry) == 'sex'] <- 'sex_mf'

# First 4 axes

plot_list = list()

for (i in 1:4) 
{ 
  plot_list[[i]] <-  local ({
    i<- i 
    p <- ggplot(df_allometry, aes(x = log(size), y = df_allometry[,i], group = sex_mf)) +
                theme_minimal() +
                theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
                geom_point(aes(color = sex_mf, shape = sex_mf, size = size)) +
                scale_color_manual(values = colors_to_plot) +
                scale_shape_manual(values=shape_to_plot) +
                geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) + 
                labs(x = paste0('Log Centroid size'),
                     y = paste0('PC ', paste(i))) +
                stat_regline_equation(aes(label =  paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")))
  })
} 

ggarrange(plotlist = plot_list, ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom")

# PC1 vs PC2 size


PC1vsPC2 <- ggplot(data=df_pca[,1:2],aes(x=df_pca[,1],y=df_pca[,2], color = sex, shape = sex)) +
                theme_minimal() +
                theme(axis.line = element_line(color = "darkgrey", size = 0.5)) +
                geom_point(aes(size =  log(y$Csize),alpha=0.8)) +
                scale_color_manual(values = colors_to_plot) +
                scale_fill_manual(values = colors_to_plot) +
                scale_shape_manual(values = shape_to_plot) +
                geom_text_repel(aes(label=number), size=3) +
                labs(x = paste0('PC1 = ', round(((eigenvalues[1]/sum(eigenvalues))*100), digits = 2), '%'), 
                     y = paste0('PC2 = ', round(((eigenvalues[2]/sum(eigenvalues))*100), digits = 2), '%')) +              
                geom_polygon(data = chull_PCA12, aes(x= Comp1, y = Comp2, fill = sex, colour = sex), alpha = 0.2)  
PC1vsPC2