#############CODE FOR CALCULATING PRIOR AGE FOR A CALIBRATED NODE##################### ### Special thanks to David Cannatella, The University of Texas at Austin for helping with this code### library(MCMCpack) library(ggplot2) ################################################ # Exponential Distribution for Calibration # The single parameter of the exponential distribution is the scale (= mean) parameter. # However, the R functions accept the rate, which is 1/mean # Rate = 1/Mean # Mean = 1/Rate = Beta = standard deviation # Median = log(2)/Rate = log(2) * Mean # log(2) = 0.6931472 # You may change the next two values as needed. But change nothing else! myOffset <- 8.8 # e.g. Minimum age of node myMedian <- 10.2 # This value includes the offset (myMean <- (myMedian - myOffset)/log(2)) myRate <- 1/myMean # Calculate percentiles and plot them (pct95 <- qexp(0.95, rate = myRate) + myOffset) (pct975 <- qexp(0.975, rate = myRate) + myOffset) maxX <- pct975 + 20 df <- data.frame(x = c(0 + myOffset, maxX)) ggplot(df, aes(x)) + xlim(0, 100) + stat_function(fun = dexp, args = list(rate = 1/myMean), color = "black", n = 100, xlim = (c(myOffset, maxX))) + xlab("millions of years") + scale_x_continuous(breaks = seq(-10, maxX, by = 10)) + geom_vline(aes(xintercept = pct975), col = "red", lty = "dashed") + geom_vline(aes(xintercept = pct95), col = "green", lty = "dashed") + geom_vline(aes(xintercept = myMean + myOffset), col = "blue", lty = "dashed") + geom_vline(aes(xintercept = myMedian), col = "orange", lty = "dashed") + geom_vline(aes(xintercept = myOffset), col = "darkgray", lty = "dashed") + geom_label(x = pct975, y = .015, label = "97.5%", size = 3) + geom_label(x = pct95, y = .015, label = "95%", size = 3) + geom_label(x = myMean + myOffset, y = .015, label = "mean", size = 3) + geom_label(x = myMedian, y = .01, label = "median (50%)", size = 3) + geom_label(x = myOffset, y = .005, label = "Offset", size = 3) + # geom_label(x = maxX/2, y = .01, label = paste("offsetexp(", myOffset, ",", signif(myMean + myOffset, digits = 3), ")"), size = 4) ggtitle(label = paste("offsetexp(", myOffset, ",", signif(myMean + myOffset, digits = 3), ")")) ################################################################################################# ###############CODE FOR CALCULATING EVOLUTIONARY RATE OF A TREE FOR CALIBRATION################## ## If you use this code for another project, please cite the Gunnell et al. 2018 paper ## ##Gunnell, G.F., Boyer, D.M., Friscia, A.R., Heritage, S., Manthi, F.K., Miller, E.R., Sallam, H.M., Simmons, N.B., Stevens, N.J., Seiffert, E.R., 2018. Fossil lemurs from Egypt and Kenya suggest an African origin for Madagascar’s aye-aye. Nature communications, 9(1), p.3193. #### # Load the package (after installation). library(optimx) # optimx seems better than R's default optim() library(GenSA) # GenSA seems better than optimx (but slower) on 5+ parameters, # seems to sometimes fail on simple problems (2-3 parameters) library(FD) # for FD::maxent() (make sure this is up-to-date) library(snow) # (if you want to use multicore functionality; some systems/R versions prefer library(parallel), try either) library(parallel) library(rexpokit) library(cladoRcpp) ############################################################# wd = np("~/Desktop/clockratepr") setwd(wd) getwd() treefilename="mito_newick.nwk" datesfilename="mito_dates.txt" rootage=8.8 ############################################################# library(ape) library(fitdistrplus) ############################################################# # to extract path lengths, scale them, and write them to a csv file # mytree=read.tree(treefilename) mytips=as.data.frame(mytree$tip) tipnumbers=as.numeric(rownames(mytips)) tipnames=as.matrix(mytips) maxtipnumber=max(tipnumbers) rootnumber=maxtipnumber+1 mymatrix=matrix(nrow=maxtipnumber,ncol=5) colnames(mymatrix)=c("node #","taxon","path-length","tip-date","scaled-path") mymatrix[,1]=tipnumbers mymatrix[,2]=tipnames[,1] mytable=dist.nodes(mytree) torootdist_A=as.matrix(mytable[,rootnumber]) torootdist_B=as.matrix(torootdist_A[1:maxtipnumber,1]) mymatrix[,3]=torootdist_B[,1] ## mydates=read.table(datesfilename) mydates=as.matrix(mydates) mydates=mydates[order(mydates[, "V1"]),] mymatrix=mymatrix[order(mymatrix[, "taxon"]),] mymatrix[,4]=mydates[,2] pathlength=as.numeric(mymatrix[,3]) tipdate=as.numeric(mymatrix[,4]) count=1 repeat { mymatrix[count,5]=(pathlength[count])/((rootage)-(tipdate[count])) count=count+1 if (count==maxtipnumber+1) break } filenameout_a="path.lengths." filenameout_b=rootage filenameout_c=".csv" filenameout_d=paste(filenameout_a,filenameout_b,filenameout_c,sep="") write.table(mymatrix,filenameout_d,row.names=F,na="",sep=",") ############################################################# # to fit distributions and write-out a table with parameter values, likelihoods and BICs # x=as.numeric(mymatrix[,5]) xfornormal=x*1000 fitted_normal=fitdist(xfornormal,"norm",method="mle") a=as.matrix(unlist(summary(fitted_normal))) names=rownames(a) names=names[1:20] a1="" a2=a["estimate.mean",] a3=a["estimate.sd",] a4=a["method",] a5=a["n",] a6=a["loglik",] a7=a["bic",] names=c("NORMAL","estimate.mean","estimate.sd","method","n","loglik","bic") normal_summary=matrix(,nrow=7,ncol=2) normal_summary[1:7,1]=names normal_summary[1,2]=a1 normal_summary[2,2]=a2 normal_summary[3,2]=a3 normal_summary[4,2]=a4 normal_summary[5,2]=a5 normal_summary[6,2]=a6 normal_summary[7,2]=a7 a22=as.numeric(a2) a33=as.numeric(a3) a2=a22/1000 a3=a33/1000 normal_summary[2,2]=a2 normal_summary[3,2]=a3 xforlognormal=x fitted_lognormal=fitdist(xforlognormal,"lnorm",method=c("mle")) b=as.matrix(unlist(summary(fitted_lognormal))) names=rownames(b) names=names[1:20] b1="" b2=b["estimate.meanlog",] b3=b["estimate.sdlog",] b4=b["method",] b5=b["n",] b6=b["loglik",] b7=b["bic",] names=c("LOGNORMAL","estimate.meanlog","estimate.sdlog","method","n","loglik","bic") lognormal_summary=matrix(,nrow=7,ncol=2) lognormal_summary[1:7,1]=names lognormal_summary[1,2]=b1 lognormal_summary[2,2]=b2 lognormal_summary[3,2]=b3 lognormal_summary[4,2]=b4 lognormal_summary[5,2]=b5 lognormal_summary[6,2]=b6 lognormal_summary[7,2]=b7 xforgamma=x # fitted_gamma=fitdist(xforgamma,"gamma",method="mle",lower=c(0,0)) # fitted_gamma=fitdist(xforgamma,"gamma",method="mle") c=as.matrix(unlist(summary(fitted_gamma))) names=rownames(c) names=names[1:20] c1="" c2=c["estimate.shape",] c3=c["estimate.rate",] c4=c["method",] c5=c["n",] c6=c["loglik",] c7=c["bic",] names=c("GAMMA","estimate.shape","estimate.rate","method","n","loglik","bic") gamma_summary=matrix(,nrow=7,ncol=2) gamma_summary[1:7,1]=names gamma_summary[1,2]=c1 gamma_summary[2,2]=c2 gamma_summary[3,2]=c3 gamma_summary[4,2]=c4 gamma_summary[5,2]=c5 gamma_summary[6,2]=c6 gamma_summary[7,2]=c7 normal_mean=as.numeric(normal_summary[2,2]) normal_sd=as.numeric(normal_summary[3,2]) lognormal_mean=as.numeric(lognormal_summary[2,2]) lognormal_sd=as.numeric(lognormal_summary[3,2]) gamma_shape=as.numeric(gamma_summary[2,2]) gamma_rate=as.numeric(gamma_summary[3,2]) ##### normal_summary lognormal_summary gamma_summary ##### outputmatrix=matrix(,nrow=25,ncol=2) blankcell="" outputmatrix[8,1]=blankcell outputmatrix[8,2]=blankcell outputmatrix[9,1]=blankcell outputmatrix[9,2]=blankcell outputmatrix[17,1]=blankcell outputmatrix[17,2]=blankcell outputmatrix[18,1]=blankcell outputmatrix[18,2]=blankcell outputmatrix[1:7,1:2]=normal_summary[1:7,1:2] outputmatrix[10:16,1:2]=lognormal_summary[1:7,1:2] outputmatrix[19:25,1:2]=gamma_summary[1:7,1:2] filenameout_e="fit.distributions." filenameout_f=rootage filenameout_g=".csv" filenameout_h=paste(filenameout_e,filenameout_f,filenameout_g,sep="") write.table(outputmatrix,filenameout_h,row.names=F,col.names=F,na="",sep=",") labelx="red=normal, blue=lognormal, green=gamma" labely="" titledensity="empirical density & fit distributions" titlehistogram="empirical data & fit distributions" densityofdata=density(x) pdf("density.pdf",width=5,height=5) plot(densityofdata,main=titledensity,xlab=labelx,ylab=labely) curve(dnorm(x,normal_mean,normal_sd),col="red",lwd=1,add=T) curve(dlnorm(x,lognormal_mean,lognormal_sd),col="blue",lwd=1,add=T) curve(dgamma(x,gamma_shape,gamma_rate),col="green",lwd=1,add=T) dev.off() pdf("histogram.pdf",width=5,height=5) hist(x,main=titlehistogram,xlab=labelx,ylab=labely,breaks=25) curve(dnorm(x,normal_mean,normal_sd),col="red",lwd=1,add=T) curve(dlnorm(x,lognormal_mean,lognormal_sd),col="blue",lwd=1,add=T) curve(dgamma(x,gamma_shape,gamma_rate),col="green",lwd=1,add=T) dev.off() ### quit("no")