library(pander)
library(magrittr)
library(dplyr)
library(plyr)
library(MASS)
library(ggplot2)
library(ggrepel)

#Toad morphology Analyses

toad_morph=read.csv(file.choose())
#COLLECT RESIDUALS FROM Snout-to-Urostyle OLS REGRESSIONS
r.HW=lm(HW~SUL,toad_morph)$resid
r.DBIC=lm(DBIC~SUL,toad_morph)$resid
r.MPL=lm(MPL~SUL,toad_morph)$resid
r.MPW=lm(MPW~SUL,toad_morph)$resid
r.MTFL=lm(MTFL~SUL,toad_morph)$resid
r.MPPG=lm(MPPG~SUL,toad_morph)$resid
r.MPT=lm(MPT~SUL,toad_morph)$resid
toad_morph_corrected=data.frame(SP=toad_morph$SP,HW=r.HW,DBIC=r.DBIC,MPL=r.MPL,MPW=r.MPW,MTFL=r.MTFL,MPPG=r.MPPG,MPT=r.MPT)

#perform linear discriminant function analysis
toad_LDA=lda(SP~HW+DBIC+MPL++MPW+MTFL+MPPG+MPT,toad_morph_corrected)
toad_LDA

prop = toad_LDA$svd^2/sum(toad_LDA$svd^2)
prop #proportion of variation accounted for by each LDA

#predict categorical RV for continuous variables
toad_predict=predict(toad_LDA)
freqtable=table(toad_predict$class, toad_morph_corrected$SP) 
rownames(freqtable)=paste0("Predicted ", toad_morph_corrected$SP %>% levels)
freqtable %>% addmargins %>% pander("Observed vs. Predicted Frequencies")
prop.table(freqtable) %>% addmargins %>% pander("Proportions")


#show first twenty predicted species designations
data.frame(toad_predict$posterior, ObservedGroup = toad_morph_corrected$SP, 
  PredictedGroup = toad_predict$class, toad_predict$x) %>% head(20) %>% pander

#put values from LDA1 into original dataframe and plot density
toad_morph_corrected$LDA1 <- toad_predict$x[,1]
toad_morph_corrected$LDA2 <- toad_predict$x[,2] #with two species (or categorical RV) only one LDA is estimated
ggplot(toad_morph_corrected, aes(LDA1, fill = SP)) + geom_density(alpha = 0.5, color = NA) + theme(legend.position = "top")

#calculate mean LDA for each group
gMeans=toad_morph_corrected%>%group_by(SP)%>%dplyr::select(LDA1,LDA2)%>%summarise_each(funs(mean)) 
gMeans %>% pander

#wrap polygon around scatter
find_hull <- function(toad_morph_corrected) toad_morph_corrected[chull(toad_morph_corrected$LDA1, toad_morph_corrected$LDA2), ]
hulls <- ddply(toad_morph_corrected, "SP", find_hull)

#plot DF
LDA_plot=ggplot(toad_morph_corrected, aes(LDA1,LDA2,color = SP))
LDA_plot=LDA_plot+geom_point(alpha = 0.75)
LDA_plot=LDA_plot+geom_point(data = gMeans,aes(fill = SP),size = 4,color = "black",pch = 21) 
LDA_plot=LDA_plot+theme(legend.position = "none")
LDA_plot=LDA_plot+coord_equal()
LDA_plot=LDA_plot+geom_polygon(data = hulls, alpha = 0.5)
LDA_plot=LDA_plot+geom_text_repel(data = gMeans,aes(label = SP),color = "black",vjust = 1.75)
LDA_plot

####################################################################################################
#Advertisement call analyses
calls=read.csv(file.choose())
calls$Frange=c(calls$High.Freq..Hz.-calls$Low.Freq..Hz.)
summary(calls$Frange)
calls=data.frame(CL=calls$Delta.Time..s.,FR=calls$Frange,PN=calls$pulses.per.sec,DF=calls$Peak.Freq..Hz.,LF=calls$Low.Freq..Hz.,HF=calls$High.Freq..Hz.,AP=calls$Avg.Power..dB.,PP=calls$Peak.Power..dB.,file=calls$File,SP=calls$species)

min(calls[which(calls$SP=="DAT"),]$CL);min(calls[which(calls$SP=="DAT"),]$FR);min(calls[which(calls$SP=="DAT"),]$PN);min(calls[which(calls$SP=="DAT"),]$DF)
mean(calls[which(calls$SP=="DAT"),]$CL);mean(calls[which(calls$SP=="DAT"),]$FR);mean(calls[which(calls$SP=="DAT"),]$PN);mean(calls[which(calls$SP=="DAT"),]$DF)
median(calls[which(calls$SP=="DAT"),]$CL);median(calls[which(calls$SP=="DAT"),]$FR);median(calls[which(calls$SP=="DAT"),]$PN);median(calls[which(calls$SP=="DAT"),]$DF)
max(calls[which(calls$SP=="DAT"),]$CL);max(calls[which(calls$SP=="DAT"),]$FR);max(calls[which(calls$SP=="DAT"),]$PN);max(calls[which(calls$SP=="DAT"),]$DF)
sd(calls[which(calls$SP=="DAT"),]$CL);sd(calls[which(calls$SP=="DAT"),]$FR);sd(calls[which(calls$SP=="DAT"),]$PN);sd(calls[which(calls$SP=="DAT"),]$DF)

min(calls[which(calls$SP=="HT"),]$CL);min(calls[which(calls$SP=="HT"),]$FR);min(calls[which(calls$SP=="HT"),]$PN);min(calls[which(calls$SP=="HT"),]$DF)
mean(calls[which(calls$SP=="HT"),]$CL);mean(calls[which(calls$SP=="HT"),]$FR);mean(calls[which(calls$SP=="HT"),]$PN);mean(calls[which(calls$SP=="HT"),]$DF)
median(calls[which(calls$SP=="HT"),]$CL);median(calls[which(calls$SP=="HT"),]$FR);median(calls[which(calls$SP=="HT"),]$PN);median(calls[which(calls$SP=="HT"),]$DF)
max(calls[which(calls$SP=="HT"),]$CL);max(calls[which(calls$SP=="HT"),]$FR);max(calls[which(calls$SP=="HT"),]$PN);max(calls[which(calls$SP=="HT"),]$DF)
sd(calls[which(calls$SP=="HT"),]$CL);sd(calls[which(calls$SP=="HT"),]$FR);sd(calls[which(calls$SP=="HT"),]$PN);sd(calls[which(calls$SP=="HT"),]$DF)

# grouped boxplot
g1=ggplot(calls, aes(SP,DF)) +geom_boxplot()+scale_y_continuous(limits = c(1600, 2250))+theme(axis.title.x=element_text(size=rel(0)))
g2=ggplot(calls, aes(SP,CL)) +geom_boxplot()+theme(axis.title.x=element_text(size=rel(0))) 
g3=ggplot(calls, aes(SP,FR)) +geom_boxplot()+theme(axis.title.x=element_text(size=rel(0)))
g4=ggplot(calls, aes(SP,PN)) +geom_boxplot()+theme(axis.title.x=element_text(size=rel(0)))
grid.arrange(g1,g2,g3,g4,nrow=1,ncol=4)

t.test(calls[c(which(calls$SP=="HT")),"DF"],calls[c(which(calls$SP=="DAT")),"DF"]) #significant
t.test(calls[c(which(calls$SP=="HT")),"CL"],calls[c(which(calls$SP=="DAT")),"CL"]) #null
t.test(calls[c(which(calls$SP=="HT")),"FR"],calls[c(which(calls$SP=="DAT")),"FR"]) #significant
t.test(calls[c(which(calls$SP=="HT")),"PN"],calls[c(which(calls$SP=="DAT")),"PN"]) #null