#######################################################################################
##################################RESAMPLING PROCEDURE#################################
####################SOURCE CODE FOR RESAMPLING SEMILANDMARKS IN ONE SPECIMEN###########
#######################################################################################

setwd("/Users/philosauria/Desktop/rev_turtles/CSV_files_landmarks") #set directory
library(rgl)
library(geomorph) 
library(Morpho)

#plot it in 3D to control the specimen
plot3d(turt1[,6], turt1[,7], turt1[,8], aspect="iso")

#isolate landmarks first
turtland <- turt1[1:10,] #first ten lines are landmarks

#isolate semilandmarks
turtsemiland <- turt1[11:nrow(turt1),] #semilandmarks for resampling

#write new documents with landmarks and semilandmarks
write.csv(turtsemiland, file = "turtsemiland.csv")

#open semilandmarks file
turt3 <- read.table(file = "turtsemiland.csv", header = F, sep=",", skip = 1)[1:nrow(turtsemiland),]

#split the different curves by names
splitsemi <- split(turt3, factor(turt3$V3, levels = unique(turt3$V3))) #delete duplicates
length(splitsemi) #control nb of curves

#give a list of the semilandmarks curves. In this dataset, we expect 12 of them
ls(splitsemi)

#REARRANGE SEMILANDMARKS#

#we erase the names of the colonnes
(unname(turtland, force = FALSE))

#creation of a matrix
turtland<- turtland[,6:8]
turtland <- as.matrix(turtland)

######RESAMPLING 12 CURVES########

#Curve1_ Left marginals
landm1<-data.frame(splitsemi$L_marginals[7:9]) #we isolate this curve
land1<-matrix(unlist(landm1), nrow(splitsemi$L_marginals), 3) 
semiland1<-digit.curves(land1[1,], land1, nPoints=38, closed = FALSE) #resampling of the curve
semiland1 #to control the number of semilandmarks

#Curve2_ Right marginals
landm2<-data.frame(splitsemi$R_marginals[7:9])
land2<-matrix(unlist(landm2), nrow(splitsemi$R_marginals), 3)
semiland2<-digit.curves(land2[1,], land2, nPoints=38, closed = FALSE)
semiland2

#Curve3_ Vertebrals
landm3<-data.frame(splitsemi$Vertebrals[7:9])
land3<-matrix(unlist(landm3), nrow(splitsemi$Vertebrals), 3)
semiland3<-digit.curves(land3[1,], land3, nPoints=28, closed = FALSE)
semiland3

#Curve4_ Left epi-hyoplastron
landm4<-data.frame(splitsemi$L_epi_hyo[7:9])
land4<-matrix(unlist(landm4), nrow(splitsemi$L_epi_hyo), 3)
semiland4<-digit.curves(land4[1,], land4, nPoints=13, closed = FALSE)
semiland4

#Curve5_ Left hyo-hypoplastron
landm5<-data.frame(splitsemi$L_hyo_hypo[7:9])
land5<-matrix(unlist(landm5), nrow(splitsemi$L_hyo_hypo), 3)
semiland5<-digit.curves(land5[1,], land5, nPoints=8, closed = FALSE)
semiland5

#Curve6_ Left hypo-xiphiplastron
landm6<-data.frame(splitsemi$L_hypo_xiphi[7:9])
land6<-matrix(unlist(landm6), nrow(splitsemi$L_hypo_xiphi), 3)
semiland6<-digit.curves(land6[1,], land6, nPoints=8, closed = FALSE)
semiland6

#Curve7_ Right epi-hyoplastron
landm7<-data.frame(splitsemi$R_hypo_xiphi[7:9])
land7<-matrix(unlist(landm7), nrow(splitsemi$R_hypo_xiphi), 3)
semiland7<-digit.curves(land7[1,], land7, nPoints=8, closed = FALSE)
semiland7

#Curve8_ Right hyo-hypoplastron
landm8<-data.frame(splitsemi$R_hyo_hypo[7:9])
land8<-matrix(unlist(landm8), nrow(splitsemi$R_hyo_hypo), 3)
semiland8<-digit.curves(land8[1,], land8, nPoints=8, closed = FALSE)
semiland8

#Curve9_ Right hypo-xiphiplastron
landm9<-data.frame(splitsemi$R_epi_hyo[7:9])
land9<-matrix(unlist(landm9), nrow(splitsemi$R_epi_hyo), 3)
semiland9<-digit.curves(land9[1,], land9, nPoints=13, closed = FALSE)
semiland9

#Curve10_ Length plastron
landm10<-data.frame(splitsemi$longueur_plastron[7:9])
land10<-matrix(unlist(landm10), nrow(splitsemi$longueur_plastron), 3)
semiland10<-digit.curves(land10[1,], land10, nPoints=28, closed = FALSE)
semiland10

#Curve11_ Width plastron
landm11<-data.frame(splitsemi$largeur_plastron[7:9])
land11<-matrix(unlist(landm11), nrow(splitsemi$largeur_plastron), 3)
semiland11<-digit.curves(land11[1,], land11, nPoints=13, closed = FALSE)
semiland11

#Curve12_ Width shell
landm12<-data.frame(splitsemi$largeur_shell[7:9])
land12<-matrix(unlist(landm12), nrow(splitsemi$largeur_shell), 3)
semiland12<-digit.curves(land12[1,], land12, nPoints=28, closed = FALSE)
semiland12

#plotting to control the curves
plot3d(semiland1[,1], semiland1[,2], semiland1[,3], aspect="iso")
plot3d(semiland2[,1], semiland2[,2], semiland2[,3], aspect="iso", add=TRUE)
plot3d(semiland3[,1], semiland3[,2], semiland3[,3], aspect="iso", add=TRUE)
plot3d(semiland4[,1], semiland4[,2], semiland4[,3], aspect="iso", add=TRUE)
plot3d(semiland5[,1], semiland5[,2], semiland5[,3], aspect="iso", add=TRUE)
plot3d(semiland6[,1], semiland6[,2], semiland6[,3], aspect="iso", add=TRUE)
plot3d(semiland7[,1], semiland7[,2], semiland7[,3], aspect="iso", add=TRUE)
plot3d(semiland8[,1], semiland8[,2], semiland8[,3], aspect="iso", add=TRUE)
plot3d(semiland9[,1], semiland9[,2], semiland9[,3], aspect="iso", add=TRUE)
plot3d(semiland10[,1], semiland10[,2], semiland10[,3], aspect="iso", add=TRUE)
plot3d(semiland11[,1], semiland11[,2], semiland11[,3], aspect="iso", add=TRUE)
plot3d(semiland12[,1], semiland12[,2], semiland12[,3], aspect="iso", add=TRUE)

#create a list with all the curves
finish <- bindArr(semiland1, semiland2,semiland3, semiland4,semiland5,semiland6,semiland7,semiland8,semiland9,semiland10,semiland11,semiland12, along=1)


######################################################################################
##################################GPA AND PCA ANALYSIS################################
######################################################################################
#call packages
library(rgl)
library(geomorph)
library(RRPP)
library(stats)
library(RColorBrewer)

#set directory
setwd("/Users/philosauria/Desktop/rev_turtles/PCA")

#call resampled landmarks
tt2d <-read.table(file = "turtles_SET1_resampled.csv", header = T, sep=",", skip = 0, fill = TRUE, row.names=1)#open the document with the 3D array composed of all the landmarks and semilandmarks data
tt3d <- arrayspecs(tt2d, 265, 3)


#open sliders file
cm<-as.matrix(read.table(file = "curves_slider_matrix.csv", header = F, sep=";", skip = 0, fill = TRUE))
	cm<- cm[-256:-340,-4:-8]

#open species list (in the same order as the dataset)
speclist<-c("Dermatemys_mawi", "Staurotypus_triporcatus", "Claudius_angustatus", "Sacalia_quadriocellata", "Mauremys_reevesii", "Apalone_mutica", "Pelusios_situatus", "Elseya_novaeguineae", "Chelydra_serpentina", "Carettochelys_insculpta", "Emys_orbicularis", "Pelomedusa_subrufa", "Psammobates_tentorius", "Homopus_femoralis", "Kinixys_belliana", "Rafetus_euphraticus", "Platysternon_megacephalum", "Pelodiscus_sinensis", "Stigmochelys_pardalis", "Dermochelys_coriacea", "Chelonoidis_carbonaria", "Rhinoclemmys_annulata", "Emydura_macquarii", "Astrochelys_radiata", "Pyxis_arachnoides", "Phrynops_tuberosus", "Podocnemis_vogli", "Lissemys_punctata", "Chelodina_oblonga", "Mesoclemmys_dahli", "Chersina_angulata", "Gopherus_polyphemus", "Sternotherus_odoratus", "Graptemys_geographica", "Clemmys_guttata", "Deirochelys_reticularia", "Malaclemys_terrapin", "Kinosternon_baurii", "Emydoidea_blandingii", "Eretmochelys_imbricata", "Geochelone_elegans", "Chelonia_mydas", "Actinemys_marmorata", "Terrapene_carolina", "Testudo_graeca", "Hydromedusa_tectifera", "Orlitia_borneensis", "Cuora_amboinensis", "Notochelys_platynota", "Cyclemys_dentata", "Batagur_dhongoka", "Vijayachelys_silvatica", "Melanochelys_trijuga", "Caretta_caretta", "Dogania_subplana", "Heosemys_spinosa", "Chelus_fimbriata", "Macrochelys_temminckii", "Malacochersus_tornieri", "Malayemys_subtrijuga", "Indotestudo_elongata", "Pangshura_tentoria", "Morenia_petersi", "Geoemyda_spengleri", "Manouria_impressa", "Platemys_platycephala", "Trachemys_scripta", "Glyptemys_insculpta", "Chrysemys_picta", "Proganochelys_quenstedti", "Proterochersis_robusta", "Plesiochelys_bigleri")

#open ecology list (in the same order as the dataset)
eco <- read.table(file = "ecology_web.csv", header = F, sep=";", skip = 0, fill = TRUE)
	eco <- unlist(eco, use.names=FALSE) #this step transform the dataset into "factors"


#creation of the dataset, composed of the landmarks files, curves sliders, and ecology categories
data.tt <- list("land" = tt3d, "curves" = cm, "ecology" = eco)
	str(data.tt) #check that everything is in order


		#if any, in which specimen data
		apply(is.na(data.tt$land), 3, which)

		#if any missing data: estimation of missing data using TPS or Reg methods
		data.tt$land <-estimate.missing(data.tt$land, method = c("TPS","Reg"))

######################################################################################
############################GENERALIZED PROCRUSTES ANALYSIS###########################
######################################################################################

#Generalized Procrustes WITH CURVES#

# Using bending energy for sliding
gpa.Y <-gpagen(data.tt$land, curves=data.tt$curves, Proj = TRUE, ProcD = FALSE)

pdf('outliers_SET1.pdf')
	plotOutliers(gpa.Y$coords) #check for extreme specimen and errors of data acquisition
dev.off()

summary(gpa.Y) #consensus shape


########Check specimens with problems######
#Plot meanshape (reference)
#ref<-mshape(gpa.Y$coords)
#plot reference to target and curves to compare
#plotRefToTarget(ref,gpa.Y$coords[,,2],method="points", links=cm) #compare one specimen to the mean shape


###################################################################################
###############################PRINCIPAL COMPONENT ANALYSIS########################
###################################################################################

#plot PCA results

		gp <- data.tt$ecology #grouping of the turtles by ecology categories
		col.gp <- brewer.pal(length(levels(gp)), "Set1")
		col.gp[6] <- "black"
   		names(col.gp) <- levels(gp)
	col.gp <- col.gp[match(gp, names(col.gp))] # col.gp must NOT be a factor

pca.lands <- gm.prcomp(gpa.Y$coords) #plot PCA and warpgrids

pdf('pca_SET1.pdf')
plot(pca.lands, pch = c(19, 17, 23, 15, 25, 20)[as.numeric(eco)], cex = 1.1, col = col.gp, bg=col.gp)

dev.off()	

pdf('pca_SET1_numbers.pdf')
		gp <- data.tt$ecology #grouping of the turtles by ecology categories
		col.gp <- brewer.pal(length(levels(gp)), "Set1")
		col.gp[6] <- "black"
   		names(col.gp) <- levels(gp)
	col.gp <- col.gp[match(gp, names(col.gp))] # col.gp must NOT be a factor

pca.lands <- gm.prcomp(gpa.Y$coords) #plot PCA and warpgrids


plot(pca.lands, pch = c(19, 17, 23, 15, 25, 22)[as.numeric(eco)], cex = 1.1, col = col.gp, bg=col.gp)
text(pca.lands$x[,1], pca.lands$x[,2],cex = 0.7, pos = 4, col = col.gp)

dev.off()	

#PC1min
plot3d(pca.lands$shapes$shapes.comp1$min, aspect="iso")
#PC1max
plot3d(pca.lands$shapes$shapes.comp1$max, aspect="iso")
#PC2min
plot3d(pca.lands$shapes$shapes.comp2$min, aspect="iso")
#PC2max
plot3d(pca.lands$shapes$shapes.comp2$max, aspect="iso")

######################################################################################
##################################HOW MUCH PCA DO WE KEEP ?###########################
######################################################################################

#http://rfunctions.blogspot.com/2015/01/pca-principal-component-analysis.html

pca.lands
summary(pca.lands)
loadings <-pca.lands$rotation	# The loadings (or eigenvectors) are simply our rotation values.

ev <- pca.lands$d #eigenvalues

####SOURCE :
# Plot eigenvalues and percentages of variation of an ordination object
# Kaiser rule and broken stick model
# Usage:
# evplot(ev)
# where ev is a vector of eigenvalues
 
# License: GPL-2 
# Author: Francois Gillet, 25 August 2012
 
evplot <- function(ev)
{
	# Broken stick model (MacArthur 1957)
	n <- length(ev)
	bsm <- data.frame(j=seq(1:n), p=0)
	bsm$p[1] <- 1/n
	for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
	bsm$p <- 100*bsm$p/n
	# Plot eigenvalues and % of variation for each axis
	op <- par(mfrow=c(2,1))
	barplot(ev, main="Eigenvalues", col="bisque", las=2)
	abline(h=mean(ev), col="red")
	legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
	barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, 
		main="% variation", col=c("bisque",2), las=2)
	legend("topright", c("% eigenvalue", "Broken stick model"), 
		pch=15, col=c("bisque",2), bty="n")
	par(op)
}

#export brokenstick plot
pdf('evplot_SET1.pdf')
evplot(ev)
dev.off()

#save the PC Data
write.csv(pca.lands$x, file = "pca_SET1.csv")
write.csv(pca.lands$d, file = "standard_dev_SET1.csv")

######################################################################################
############################LINEAR DISCRIMINANT ANALYSIS##############################
######################################################################################


setwd("/Users/philosauria/Desktop/rev_turtles/LDA")

#call packages
library(ape)
library(class)
library(geiger)
library(lattice)
library(mda)
library(nnet)
library(MASS)
library(RColorBrewer)

#open species list (in the same order as the dataset)
speclist<-c("Dermatemys_mawi", "Staurotypus_triporcatus", "Claudius_angustatus", "Sacalia_quadriocellata", "Mauremys_reevesii", "Apalone_mutica", "Pelusios_situatus", "Elseya_novaeguineae", "Chelydra_serpentina", "Carettochelys_insculpta", "Emys_orbicularis", "Pelomedusa_subrufa", "Psammobates_tentorius", "Homopus_femoralis", "Kinixys_belliana", "Rafetus_euphraticus", "Platysternon_megacephalum", "Pelodiscus_sinensis", "Stigmochelys_pardalis", "Dermochelys_coriacea", "Chelonoidis_carbonaria", "Rhinoclemmys_annulata", "Emydura_macquarii", "Astrochelys_radiata", "Pyxis_arachnoides", "Phrynops_tuberosus", "Podocnemis_vogli", "Lissemys_punctata", "Chelodina_oblonga", "Mesoclemmys_dahli", "Chersina_angulata", "Gopherus_polyphemus", "Sternotherus_odoratus", "Graptemys_geographica", "Clemmys_guttata", "Deirochelys_reticularia", "Malaclemys_terrapin", "Kinosternon_baurii", "Emydoidea_blandingii", "Eretmochelys_imbricata", "Geochelone_elegans", "Chelonia_mydas", "Actinemys_marmorata", "Terrapene_carolina", "Testudo_graeca", "Hydromedusa_tectifera", "Orlitia_borneensis", "Cuora_amboinensis", "Notochelys_platynota", "Cyclemys_dentata", "Batagur_dhongoka", "Vijayachelys_silvatica", "Melanochelys_trijuga", "Caretta_caretta", "Dogania_subplana", "Heosemys_spinosa", "Chelus_fimbriata", "Macrochelys_temminckii", "Malacochersus_tornieri", "Malayemys_subtrijuga", "Indotestudo_elongata", "Pangshura_tentoria", "Morenia_petersi", "Geoemyda_spengleri", "Manouria_impressa", "Platemys_platycephala", "Trachemys_scripta", "Glyptemys_insculpta", "Chrysemys_picta", "Proganochelys_quenstedti", "Proterochersis_robusta", "Plesiochelys_bigleri")


#read PC Data and ecology categories
#COR<-read.csv("pca_allturtles_alldata_MODIFIED.csv", header=TRUE, sep=",", row.names=1) #3CAT
COR<-read.csv("pca_SET1.csv", header=TRUE, sep=";",row.names=1)#5cat
rownames(COR) <- speclist

eco <- read.table(file = "ecology_web.csv", header = F, sep=";", skip = 0, fill = TRUE)

CORN <- row.names(COR)
names (CORN) <- row.names(COR)

gA <- eco 	#Grouping

############################
XA <- COR[,1:9] #PCA Data

############################
############################
#for FOSSIL data

testtaxa <- rownames(COR[gA=="UKN",]) # UNKNOWN = FOSSIL DATA
testtaxan <- row(COR)[gA=="UKN",1]
trainingtaxa <- rownames(COR[-testtaxan,])
X <- XA[-testtaxan,]
sf <- COR[-testtaxan,]
g <- gA[-testtaxan,]
g <- droplevels(g)
X2 <- XA[testtaxa,]


#TRAINING DATA

gp <- g #grouping of the turtles by ecology categories
		col.gp <- brewer.pal(length(levels(gp)), "Set1")
   		names(col.gp) <- levels(gp)
	col.gp <- col.gp[match(gp, names(col.gp))] # col.gp must NOT be a factor


t.lda <- lda(g ~ ., X)
t.lda.values  <- predict(t.lda)
ldahist(t.lda.values$x[,1], g = g)
confusion <- table(g, t.lda.values$class, dnn = c('Actual Group','Predicted Group'))


t.lda_CV <- lda(g ~ .,CV=TRUE, X) #using cross validation model to compare
confusion_CV <- table(g, t.lda_CV$class, dnn = c('Actual Group','Predicted Group'))


diag(prop.table(confusion_CV, 1))
diag(prop.table(confusion, 1)) #Rates should be the same to use the first model. 

#plot extant turtles

plot(t.lda.values$x[,1], t.lda.values$x[,2], pch = c(19, 17, 23, 15, 25)[as.numeric(g)], cex=1, bg=col.gp, col=col.gp)


#predictions on fossils

plda <- predict(object = t.lda, newdata = X2)

plda$class

lda_data <- rbind(t.lda.values$x, plda$x)

x<-as.data.frame(t.lda.values$class)
x[,1] <- trainingtaxa
x[,2] <- g
x[,3] <- as.data.frame(t.lda.values$class)
mat <- cbind(x, t.lda.values$x)
colnames(mat) <- c("species", "true class", "predicted class", "LD1", "LD2", "LD3", "LD4")
row.names(mat) <- NULL


# write tables
write.csv(lda_data, "SET1_LDA_LDA_VALUES_EXT_FOSS.csv")
write.csv(confusion, "SET1_LDA_Cross-classification-confusion matrix.csv")
write.csv(confusion_CV, "SET1_CV_LDA_Cross-classification-confusion_matrix.csv")
write.csv(plda$class, "SET1_LDA_prediction_matrix.csv")
write.csv(mat, "SET1_LDA_VALUES_LD_MATRIX.csv")

aa <- cbind(plda$class, plda$posterior, plda$x)
write.csv(aa, "SET1_PREDICTIONS_FOSSILS.csv")

pdf('LDA_SET1_EXTANT.pdf')
plot(t.lda.values$x[,1], t.lda.values$x[,2], pch = c(19, 17, 23, 15, 25)[as.numeric(g)], cex=1, bg=col.gp, col=col.gp)
dev.off()


g2 <- as.factor(c(g, "6", "6", "6"))
lda_data2 <- cbind(lda_data, g2)

col.gp2 <- brewer.pal(length(levels(g2)), "Set1")
col.gp2[6] <- "black"
   names(col.gp2) <- levels(g2)
col.gp2 <- col.gp2[match(g2, names(col.gp2))] # col.gp must NOT be a factor


pdf('LDA_SET1_fossils_numbers.pdf')
	plot(lda_data2[,1], lda_data2[,2], pch = c(21, 24, 23, 22, 25, 20)[as.numeric(g2)], cex= 1.5, bg=col.gp2, col="black")
	text(lda_data2[,1], lda_data2[,2], cex = 0.7, pos = 4, col = col.gp2)
dev.off()


pdf('LDA_SET1_fossils.pdf')
	plot(lda_data2[,1], lda_data2[,2], pch = c(21, 24, 23, 22, 25, 20)[as.numeric(g2)], cex= 1.5, bg=col.gp2, col="black")
	
dev.off()


#############################################
##########MEAN SHAPES#########################
#############################################
#############################################

eco <- read.table(file = "ecology_web.csv", header = F, sep=";", skip = 0, fill = TRUE)
d2_tt <- two.d.array(gpa.Y$coords)
dd_tt <- cbind (eco, d2_tt)
split_tt <- split(dd_tt, dd_tt[,1]) #split categories
length(split_tt) #control nb of curves


TER <- split_tt$TER
SET <- split_tt$SET
SEA <- split_tt$SEA
FUA <- split_tt$FUA
FME <- split_tt$FME
UKN <- split_tt$UKN

dTER <- arrayspecs(split_tt$TER[,2:796], 265, 3)
dSET <- arrayspecs(split_tt$SET[,2:796], 265, 3)
dSEA <- arrayspecs(split_tt$SEA[,2:796], 265, 3)
dFUA <- arrayspecs(split_tt$FUA[,2:796], 265, 3)
dFME <- arrayspecs(split_tt$FME[,2:796], 265, 3)
dUKN <- arrayspecs(split_tt$UKN[,2:796], 265, 3)


mTER <- mshape(dTER, na.action=1)
mSET <- mshape(dSET, na.action=3)
mSEA <- mshape(dSEA, na.action=3)
mFUA <- mshape(dFUA, na.action=3)
mFME <- mshape(dFME, na.action=3)

mfrow3d(3, 2)
plot3d(mTER[,1],mTER[,2],mTER[,3], asp=FALSE)
plot3d(mSET[,1],mSET[,2],mSET[,3], asp=FALSE)
plot3d(mSEA[,1],mSEA[,2],mSEA[,3], asp=FALSE)
plot3d(mFUA[,1],mFUA[,2],mFUA[,3], asp=FALSE)
plot3d(mFME[,1],mFME[,2],mFME[,3], asp=FALSE)


######################################################################################
##################pFDA  - Code adapted from MONTANI and SCHMITZ 2011##################
########################doi:10.1111/j.1558-5646.2011.01271.x##########################
######################################################################################

#call packages
library(ape)
library(class)
library(geiger)
library(lattice)
library(mda)
library(nnet)
library(RColorBrewer)

#set directory

setwd("/Users/philosauria/Desktop/rev_turtles/pFDA")

source("phylo.fda.v0.2.R")

speclist<-c("Dermatemys_mawi","Staurotypus_triporcatus","Claudius_angustatus","Sacalia_quadriocellata","Mauremys_reevesii","Apalone_mutica","Pelusios_sinuatus","Elseya_novaeguineae","Chelydra_serpentina","Carettochelys_insculpta","Emys_orbicularis","Pelomedusa_subrufa","Psammobates_tentorius","Homopus_femoralis","Kinixys_belliana","Rafetus_euphraticus","Platysternon_megacephalum","Pelodiscus_sinensis","Stigmochelys_pardalis","Dermochelys_coriacea","Chelonoidis_carbonaria","Rhinoclemmys_annulata","Emydura_macquarii","Astrochelys_radiata","Pyxis_arachnoides","Phrynops_tuberosus","Podocnemis_vogli","Lissemys_punctata","Chelodina_oblonga","Mesoclemmys_dahli","Chersina_angulata","Gopherus_polyphemus","Sternotherus_odoratus","Graptemys_geographica","Clemmys_guttata","Deirochelys_reticularia","Malaclemys_terrapin","Kinosternon_baurii","Emydoidea_blandingii","Eretmochelys_imbricata","Geochelone_elegans","Chelonia_mydas","Actinemys_marmorata","Terrapene_carolina","Testudo_graeca","Hydromedusa_tectifera","Orlitia_borneensis","Cuora_amboinensis","Notochelys_platynota","Cyclemys_dentata","Batagur_dhongoka","Vijayachelys_silvatica","Melanochelys_trijuga","Caretta_caretta","Dogania_subplana","Heosemys_spinosa","Chelus_fimbriata","Macrochelys_temminckii","Malacochersus_tornieri","Malayemys_subtrijuga","Indotestudo_elongata","Pangshura_tentoria","Morenia_petersi","Geoemyda_spengleri","Manouria_impressa","Platemys_platycephala","Trachemys_scripta","Glyptemys_insculpta","Chrysemys_picta","Proganochelys_quenstedti","Proterochersis_robusta","Plesiochelys_bigleri")

#read consensus tree
treA <- read.tree("trees_age_topo_turtles5_fossils.tre")

treA$tip.label
if(!is.binary.tree(treA)) treA <- multi2di(treA, random = TRUE) 
is.ultrametric(treA) #should be FALSE

##########################
#read PC Data and ecology categories
COR<-read.csv("pca_SET1.csv", header=T, sep=";")
CORN <- COR$species
names (CORN) <- COR$species


CORorder<-COR[match(treA$tip.label, names(CORN)),]

ddA <- CORorder
taxaA <- ddA$species
rownames(ddA) <- taxaA

############################
gA <- ddA$ecology 	#Grouping


############################
XA <- ddA[,3:11] #PCA Data

############################
############################
#for FOSSIL data

testtaxa <- rownames(ddA[gA=="UKN",]) # UNKNOWN = FOSSIL DATA
testtaxan <- row(ddA)[gA=="UKN",1]
trainingtaxa <- rownames(ddA[-testtaxan,])
X <- XA[-testtaxan,]
dd <- ddA[-testtaxan,]
g <- gA[-testtaxan]
tre <- drop.tip(treA, testtaxa) 
is.ultrametric(tre)


############IF NOT BUT ALMOST###################
force.ultrametric<-function(tree,method=c("nnls","extend")){
    method<-method[1]
    if(method=="nnls") tree<-nnls.tree(cophenetic(tree),tree,
        rooted=TRUE,trace=0)
    else if(method=="extend"){
        h<-diag(vcv(tree))
        d<-max(h)-h
        ii<-sapply(1:Ntip(tree),function(x,y) which(y==x),
            y=tree$edge[,2])
        tree$edge.length[ii]<-tree$edge.length[ii]+d
    } else 
        cat("method not recognized: returning input tree\n\n")
    tree
}

library(phangorn)
tre<-force.ultrametric(tre) ## default method
is.ultrametric(tre) #should be TRUE

##########################
########LAMBDA############
##########################
filename_stem <- "HABITAT.optLambda"
ol1 <- optLambda(X,g,tre,idc=filename_stem) #UNKNOWN TAXA

ol1$optlambda	#Lambda value

######################################################################################
###############################PHYLOGENETIC DISCRIMINANT ANALYSIS#########################
######################################################################################

#SIMPLE
optl <- 0	#!!!!!!!!Replace with the optimal lambda value from above!!!!!!
pfda <- phylo.fda.pred(XA,gA,taxaA,treA,testtaxan,val=optl)

pfda$confusion

pfda$testprediction

#Save table
write.csv(pfda$confusion, "SET1_Cross-classification-confusion matrix.csv")
write.csv(pfda$testprediction, "SET1_prediction_matrix.csv")

################################
test.class <- as.character(predict(pfda, pfda$DATAtest, type="class"))
test.variates <- predict(pfda, pfda$DATAtest, type="variates")
test.prob <- predict(pfda, pfda$DATAtest, type="posterior")
test.results <- cbind(test.class, test.prob, test.variates)

#colnames(test.results)<- c("predicted class", "P(fins)", "P(notwebbed)", "P(redwebbed)", "P(paddle)","P(webbed)","DA1","DA2","DA3","DA4")

colnames(test.results)<- c("predicted class","FME", "FUA", "SEA", "SET", "TER","DA1","DA2","DA3","DA4")

rownames(test.results) <- testtaxa
head(test.results)

write.csv(test.results, "SET1_PREDICTIONS_VALUES.csv")

##################################
training.class <- as.character(predict(pfda, pfda$DATA, type="class"))
training.variates <- predict(pfda, pfda$DATA, type="variates")
training.prob <- predict(pfda, pfda$DATA, type="posterior")
training.results <- cbind(as.character(g), training.class, training.prob, training.variates)
colnames(training.results) <- c("true class", "predicted class", "P(fins)", "P(webbed)", "P(semiwebbed)","P(semiterrestrial)","P(terrestrial)","DA1","DA2","DA3","DA4")
rownames(training.results) <- trainingtaxa

#Save table
write.csv(training.results, "SET1_TURTLE_PROBABILITY_HABITAT.csv")

####################################
###PLOT
########
training <- as.data.frame(cbind(training.variates, as.character(g)))
colnames(training) <- c("DA1","DA2","DA3","DA4","groups")
rownames(training) <- rownames(dd)

####################################
unknown <- as.character(rep("UKN", times=length(testtaxan)))
test <- as.data.frame(cbind(test.variates, unknown))
colnames(test) <- c("DA1","DA2","DA3","DA4","groups")
rownames(test) <- testtaxa

##################################
#############################
scatter <- rbind(training, test) #REAL unknown taxa
write.csv(scatter,"scatter_SET1.csv")

scatter[, 1:2] <- lapply(scatter[,1:2], as.character)
scatter[, 1:2] <- lapply(scatter[,1:2], as.numeric)
######################

######################
GR1 <- scatter[scatter$groups=="TER",]
GR2 <- scatter[scatter$groups=="SET",]
GR3 <- scatter[scatter$groups=="SEA",]
GR4 <- scatter[scatter$groups=="FUA",]
GR5 <- scatter[scatter$groups=="FME",]
GR6 <- scatter[scatter$groups=="UKN",]

############################
pdf('SET1_pDFA_text.pdf')
plot(GR1$DA1, GR1$DA2,							#terrestrial
      asp=1,
      xlim=range(scatter[,1]),
      ylim=range(scatter[,2]),
      pch=25, col="black", bg="#ff7f00",
      cex=1.5, cex.lab=1.5,
      xlab="DA 1", ylab="DA 2"); 					

points(GR2$DA1, GR2$DA2, pch=22, cex=1.5, col="black", bg="#984ea3"); 	#semi-terrestrial
points(GR3$DA1, GR3$DA2, pch=23, cex=1.5, col="black", bg="#4daf4a");	#semi-aquatic
points(GR4$DA1, GR4$DA2, pch=24, cex=1.5, col="black", bg="#377eb8");	#fully aquatic
points(GR5$DA1, GR5$DA2, pch=21, cex=1.5, col="black", bg="#e41a1c");	#full marine environment
points(GR6$DA1, GR6$DA2, pch=20, cex=1.5, col="black", bg="black"); 	#fossils
box(lwd=2); axis(1, lwd=2, lwd.ticks=2); axis(2, lwd=2, lwd.ticks=2)


#Text
#text(GR1$DA1, GR1$DA2, labels=rownames(GR1), cex=0.9, font=2, pos=4)
#text(GR2$DA1, GR2$DA2, labels=rownames(GR2), cex=0.9, font=2, pos=4)
#text(GR3$DA1, GR3$DA2, labels=rownames(GR3), cex=0.9, font=2, pos=4)
#text(GR4$DA1, GR4$DA2, labels=rownames(GR4), cex=0.9, font=2, pos=4)
#text(GR5$DA1, GR5$DA2, labels=rownames(GR5), cex=0.9, font=2, pos=4)
text(GR6$DA1, GR6$DA2, labels=rownames(GR6), cex=0.9, font=2, pos=4)
dev.off()

pdf('SET1_pDFA.pdf')
plot(GR1$DA1, GR1$DA2,							#terrestrial
      asp=1,
      xlim=range(scatter[,1]),
      ylim=range(scatter[,2]),
      pch=25, col="black", bg="#ff7f00",
      cex=1.5, cex.lab=1.5,
      xlab="DA 1", ylab="DA 2"); 					

points(GR2$DA1, GR2$DA2, pch=22, cex=1.5, col="black", bg="#984ea3"); 	#semi-terrestrial
points(GR3$DA1, GR3$DA2, pch=23, cex=1.5, col="black", bg="#4daf4a");	#semi-aquatic
points(GR4$DA1, GR4$DA2, pch=24, cex=1.5, col="black", bg="#377eb8");	#fully aquatic
points(GR5$DA1, GR5$DA2, pch=21, cex=1.5, col="black", bg="#e41a1c");	#full marine environment
points(GR6$DA1, GR6$DA2, pch=20, cex=1.5, col="black", bg="black"); 	#fossils
box(lwd=2); axis(1, lwd=2, lwd.ticks=2); axis(2, lwd=2, lwd.ticks=2)


#Text
#text(GR1$DA1, GR1$DA2, labels=rownames(GR1), cex=0.9, font=2, pos=4)
#text(GR2$DA1, GR2$DA2, labels=rownames(GR2), cex=0.9, font=2, pos=4)
#text(GR3$DA1, GR3$DA2, labels=rownames(GR3), cex=0.9, font=2, pos=4)
#text(GR4$DA1, GR4$DA2, labels=rownames(GR4), cex=0.9, font=2, pos=4)
#text(GR5$DA1, GR5$DA2, labels=rownames(GR5), cex=0.9, font=2, pos=4)
#text(GR6$DA1, GR6$DA2, labels=rownames(GR6), cex=0.9, font=2, pos=4)

dev.off()