##################################################################################################### ### Evolutionary patterns of range size, abundance and species richness in Amazonian angiosperm trees ### Kyle Dexter ### University of Edinburgh ### kgdexter@gmail.com ### July 7, 2016 ##################################################################################################### ###Required packages library(ape) library(geiger) library(phytools) ###Datasets to import ##Data set on range size of Amazonian plants from Feeley and Silman. 2009. PNAS #download from http://www.pnas.org/content/106/30/12382.abstract?tab=ds FS_data_raw <- FS_data <- read.table("ST2.txt",header=T,stringsAsFactors=F) #clean up a few names that had special characters FS_data[FS_data$SPECIES.NAME=="Mimosa_chaco+\xbdnsis",1] <- "Mimosa_chacoensis" FS_data[FS_data$SPECIES.NAME=="Mimosa_morro+\xbdnsis",1] <- "Mimosa_morroensis" FS_data[FS_data$SPECIES.NAME=="Raveniopsis_araca+\xbdnsis",1] <- "Raveniopsis_aracaensis" #add a genus column and log-transform range size FS_data$Genus <- vector("character",nrow(FS_data)) for (i in 1:nrow(FS_data)){ FS_data$Genus[i] <- unlist(strsplit(FS_data$SPECIES.NAME[i],split="_"))[1] print(i) } FS_data$Genus <- as.factor(FS_data$Genus) FS_data$log10rangesize <- log10(FS_data$RANGE.AREA.KM2) #Calculate genus means for range size RangeSize <- tapply(FS_data$log10rangesize,list(Genus=FS_data$Genus),FUN=mean) #Calculate species richness for each genus and log-transform SppRichness <- log10(summary(FS_data$Genus,maxsum=50000)) ##Data set on abundance of Amazonian trees from ter Steege et al. 2013. Science #download from http://science.sciencemag.org/content/suppl/2013/10/16/342.6156.1243092.DC1 #save worksheet TS_data_raw <- TS_data <- read.csv("terSteege_Appendix.csv",stringsAsFactors=F) TS_data$Genus <- vector("character",nrow(TS_data)) for (i in 1:nrow(TS_data)){ TS_data$Genus[i] <- unlist(strsplit(TS_data$Accepted_species[i],split="_"))[1] print(i) } TS_data$Genus <- as.factor(TS_data$Genus) TS_data$log10abundance <- log10(TS_data$est.ind) #Calculate genus means for abundance Abundance <- tapply(TS_data$log10abundance,list(Genus=TS_data$Genus),FUN=mean) ##List of Neotropical tree genera produced by Rick Condit and colleagues at STRI based on plots throughout Neotropics #download from http://ctfs.si.edu/webatlas/TreeDatabaseUtilities/GenusList.php and save as tab-delimited text file #first delete one line of data that has question marks (an extra species of Eschweilera?) STRI_data_raw <- STRI_data <- read.table("STRI_genera.txt",header=T,stringsAsFactors=F) #fix a few name mismatches in this list STRI_data$Genus[which(STRI_data$Genus=="Diploön")] <- "Diploon" STRI_data$Genus[which(STRI_data$Genus=="Monnieria")] <- "Monniera" STRI_data$Genus[which(STRI_data$Genus=="Chondodendron")] <- "Chondrodendron" ##In order to extract a list of Amazonian tree genera, I intersected the list of genera from the STRI website (for all Neotropical trees) with that from Feeley and Silman (for all Amazonian plants) Amazon_tree_genera <- rownames(Genus_data)[which(rownames(Genus_data)%in%STRI_data$Genus)] #I then united this with the list from ter Steege et al. which has better coverage for trees in the Amazon than the STRI dataset and may have additional genera Amazon_tree_genera <- unique(c(Amazon_tree_genera,names(Abundance))) #This is the list we used to search Genbank to try and include as many genera as possible in the phylogeny write.table(Amazon_tree_genera,"Amazon_tree_genera.txt",row.names=F,quote=F) ##The phylogeny was generated entirely outside of the R Statistical software following methods detailed in the main text of the paper #Import the phylogeny phylog <- read.tree("Amazon_genus_phylog.tre") #drop outgroups phylog <- drop.tip(phylog,c("Amborella","Nymphaea")) ###Fitting different evolutionary models for each of the genus-level attributes compiled above #start with species richness, make data match up first phylog_FS <- drop.tip(phylog,which(!phylog$tip.label%in%names(SppRichness))) SppRichness_sub <- SppRichness[match(phylog_FS$tip.label,names(SppRichness))] (SppRichness_BM <- fitContinuous(phylog_FS,SppRichness_sub,model="BM")) # delta AIC, -287.807 (SppRichness_lambda <- fitContinuous(phylog_FS,SppRichness_sub,model="lambda")) # lambda, 0.261226 (SppRichness_white <- fitContinuous(phylog_FS,SppRichness_sub,model="white")) # delta AIC, -17.09426 #then range size RangeSize_sub <- RangeSize[match(phylog_FS$tip.label,names(RangeSize))] (RangeSize_BM <- fitContinuous(phylog_FS,RangeSize_sub,model="BM")) # delta AIC, -257.9293 (RangeSize_lambda <- fitContinuous(phylog_FS,RangeSize_sub,model="lambda")) # lambda, 0.369979 (RangeSize_white <- fitContinuous(phylog_FS,RangeSize_sub,model="white")) # delta AIC, -52.04149 #then abundance phylog_TS <- drop.tip(phylog,which(!phylog$tip.label%in%names(Abundance))) Abundance_sub <- Abundance[match(phylog_TS$tip.label,names(Abundance))] (Abundance_BM <- fitContinuous(phylog_TS,Abundance_sub,model="BM")) # delta AIC, -287.5399 (Abundance_lambda <- fitContinuous(phylog_TS,Abundance_sub,model="lambda")) # lambda, 0.322649 (Abundance_white <- fitContinuous(phylog_TS,Abundance_sub,model="white")) # delta AIC, -34.24044 ###Assessing relationships among genus-level attributes ##First, assemble dataset with data for all three genus-level attributes Genus_data_tmp <- as.data.frame(cbind(SppRichness,RangeSize)) Genus_data_tmp$Genus <- rownames(Genus_data_tmp) Abundance_df <- as.data.frame(Abundance) Abundance_df$Genus <- names(Abundance) Genus_data_all <- merge(Genus_data_tmp,Abundance_df,by="Genus",all=T) Genus_data_all <- Genus_data_all[which(Genus_data_all$Genus%in%Amazon_tree_genera),] Genus_data_complete <- merge(Genus_data_tmp,Abundance_df,by="Genus") #then cut data down to that which we have for both phylog_sub <- drop.tip(phylog,which(!phylog$tip.label%in%Genus_data_complete$Genus)) Genus_data_sub <- Genus_data_complete[match(phylog_sub$tip.label,Genus_data_complete$Genus),] #calculate the phylogenetically independent contrasts for each genus-level characteristic pic_richness <- pic(Genus_data_sub[,2],phylog_sub) pic_range <- pic(Genus_data_sub[,3],phylog_sub) pic_abundance <- pic(Genus_data_sub[,4],phylog_sub) #assess relationships between phylogenetically independent contrasts, forcing the relationship through the origin pic_richnessXrange <- lm(pic_richness~pic_range-1) pic_richnessXabundance <- lm(pic_richness~pic_abundance-1) pic_abundanceXrange <- lm(pic_abundance~pic_range-1) summary(pic_richnessXrange) # r = 0.2840775 summary(pic_richnessXabundance) # r = 0.2377814 summary(pic_abundanceXrange) # r = 0.4327817 #compare this to standard Pearson correlation coefficient cor(Genus_data_sub$SppRichness,Genus_data_sub$RangeSize) # r = -0.396348 cor(Genus_data_sub$SppRichness,Genus_data_sub$Abundance) # r = -0.3725245 cor(Genus_data_sub$Abundance,Genus_data_sub$RangeSize) # r = 0.4429052 ###Performing test of whether individual nodes show greater or smaller values than expected by chance for reconstructed values of genus-level attributes #first write function to perform the statistical test test_nodes <- function(tree,trait,nsims){ tree2use <- drop.tip(tree,which(!tree$tip.label%in%names(trait))) trait2use <- trait[match(tree2use$tip.label,names(trait))] true_ace <- fastAnc(tree2use,trait2use) sim_results <- matrix(NA,length(true_ace),nsims) for (i in 1:nsims){ randomtrait <- sample(trait2use,length(trait2use),replace=F) names(randomtrait) <- names(trait2use) sim_results[,i] <- fastAnc(tree2use,randomtrait) print(paste("Randomisation",i)) } p_value <- vector("numeric",length(true_ace)) for (j in 1:length(true_ace)){ p_value[j] <- length(which(true_ace[j] > sim_results[j,]))/(nsims+1) } out <- cbind(as.numeric(names(true_ace)),true_ace,p_value) colnames(out)[1] <- "Node" out <- as.data.frame(out) return(out) } #running function for each of the genus-level attributes SppRichness_testresults <- test_nodes(phylog_FS,SppRichness_sub,999) RangeSize_testresults <- test_nodes(phylog_FS,RangeSize_sub,999) Abundance_testresults <- test_nodes(phylog,Abundance_sub,999) ##now match these results up with clade names #first import taxonomy table taxonomy <- read.csv("taxonomy_table.csv") #reorganise this information taxonomy2 <- matrix(NA,1,3) colnames(taxonomy2) <- c("Genus","Clade","Category") for (i in 2:ncol(taxonomy)){ tmp <- na.omit(taxonomy[,c(1,i)]) colnames(tmp) <- c("Genus","Clade") tmp_clades <- summary(as.factor(tmp$Clade),maxsum=1000) tmp_clades <- tmp_clades[which(tmp_clades>1)] tmp <- tmp[which(tmp$Clade%in%names(tmp_clades)),] tmp$Category <- colnames(taxonomy)[i] taxonomy2 <- rbind(taxonomy2,tmp) } taxonomy2 <- taxonomy2[-1,] ##match up with phylogeny reduced to genera in Feeley and Silman dataset first #first get reduced taxonomy table for genera in Feeley and Silman dataset taxonomy2_FS <- taxonomy2[which(taxonomy2$Genus%in%phylog_FS$tip.label),] tmp <- summary(as.factor(taxonomy2_FS$Clade),maxsum=1000) tmp1 <- names(tmp)[which(tmp==1)] taxonomy2_FS <- taxonomy2_FS[which(!taxonomy2_FS$Clade%in%tmp1),] #obtain the clades in that dataset clades_FS <- unique(taxonomy2_FS$Clade) #create a matrix for storing node numbers to match with output from test_nodes phylog_FS_clades <- matrix(NA,length(clades_FS),3) phylog_FS_clades[,1] <- clades_FS colnames(phylog_FS_clades) <- c("Clade","Category","Node") for (i in 1:length(clades_FS)){ phylog_FS_clades[i,2] <- unique(taxonomy2_FS$Category[which(taxonomy2_FS$Clade==clades_FS[i])]) tmp <- taxonomy2_FS$Genus[which(taxonomy2_FS$Clade==clades_FS[i])] phylog_FS_clades[i,3] <- getMRCA(phylog_FS,tmp) print(i) } phylog_FS_clades <- as.data.frame(phylog_FS_clades) #deal with the fact that some clades in the phylogeny have two names (e.g. Arecaceae and Arecales are the same here), choose lower-level name tmp <- summary(phylog_FS_clades$Node,maxsum=1000) tmp1 <- names(tmp)[which(tmp>1)] phylog_FS_clades <- phylog_FS_clades[-match(tmp1,phylog_FS_clades$Node),] phylog_FS_clades$Node <- as.numeric(as.character(phylog_FS_clades$Node)) #now actually matching these up colnames(SppRichness_testresults)[2:3] <- paste(colnames(SppRichness_testresults)[2:3],"SppRichness",sep="_") colnames(RangeSize_testresults)[2:3] <- paste(colnames(RangeSize_testresults)[2:3],"RangeSize",sep="_") FS_results <- merge(SppRichness_testresults,RangeSize_testresults,by="Node") FS_results <- merge(phylog_FS_clades,FS_results,by="Node",all=T) #do the same for ter Steege dataset taxonomy2_TS <- taxonomy2[which(taxonomy2$Genus%in%phylog_TS$tip.label),] tmp <- summary(as.factor(taxonomy2_TS$Clade),maxsum=1000) tmp1 <- names(tmp)[which(tmp==1)] taxonomy2_TS <- taxonomy2_TS[which(!taxonomy2_TS$Clade%in%tmp1),] #obtain the clades in that dataset clades_TS <- unique(taxonomy2_TS$Clade) #create a matrix for storing node numbers to match with output from test_nodes phylog_TS_clades <- matrix(NA,length(clades_TS),3) phylog_TS_clades[,1] <- clades_TS colnames(phylog_TS_clades) <- c("Clade","Category","Node") for (i in 1:length(clades_TS)){ phylog_TS_clades[i,2] <- unique(taxonomy2_TS$Category[which(taxonomy2_TS$Clade==clades_TS[i])]) tmp <- taxonomy2_TS$Genus[which(taxonomy2_TS$Clade==clades_TS[i])] phylog_TS_clades[i,3] <- getMRCA(phylog_TS,tmp) print(i) } phylog_TS_clades <- as.data.frame(phylog_TS_clades) #deal with the fact that some clades in the phylogeny have two names (e.g. Arecaceae and Arecales are the same here), choose lower-level name tmp <- summary(phylog_TS_clades$Node,maxsum=1000) tmp1 <- names(tmp)[which(tmp>1)] phylog_TS_clades <- phylog_TS_clades[-match(tmp1,phylog_TS_clades$Node),] phylog_TS_clades$Node <- as.numeric(as.character(phylog_TS_clades$Node)) #now actually matching these up colnames(Abundance_testresults)[2:3] <- paste(colnames(Abundance_testresults)[2:3],"Abundance",sep="_") TS_results <- merge(phylog_TS_clades,Abundance_testresults,by="Node",all=T) #merge all results together and export FS_results_sub <- FS_results[which(!is.na(FS_results$Clade)),-1] TS_results_sub <- TS_results[which(!is.na(TS_results$Clade)),-1] clade_results <- merge(FS_results_sub,TS_results_sub,by=c("Clade","Category"),all=T) write.csv(clade_results,"Table_S3.csv",row.names=FALSE,quote=F) ###Assessing whether major groups show significant differences for genus-level attributes combined_data <- merge(taxonomy,Genus_data_all,by="Genus") plot(SppRichness~major_group,data=combined_data) summary(lm(SppRichness~major_group,data=combined_data)) TukeyHSD(aov(SppRichness~major_group,data=combined_data)) plot(RangeSize~major_group,data=combined_data) summary(lm(RangeSize~major_group,data=combined_data)) TukeyHSD(aov(RangeSize~major_group,data=combined_data)) plot(Abundance~major_group,data=combined_data) summary(lm(Abundance~major_group,data=combined_data)) TukeyHSD(aov(Abundance~major_group,data=combined_data))