setwd("/Users/genetics/Desktop/Clermontia/R/") library("biomformat") library("Matrix") library("tidyr") library("dplyr") library("vegan") library("geosphere") library("ggplot2") library("RColorBrewer") clermontia_info <- read_biom("otu_table_mc2_w_tax.biom") taxonomy <- observation_metadata(clermontia_info) taxonomy %>% mutate(OTUs = row.names(taxonomy)) -> taxonomy #Rename headers rename(taxonomy, Kingdom = taxonomy1,Phylum=taxonomy2,Class=taxonomy3, Order=taxonomy4,Family=taxonomy5,Genus=taxonomy6,species=taxonomy7) -> taxonomy #Load mapping file mapping <- as.data.frame((read.csv("Mapping_file.csv"))) #make OTU matrix otus = as.data.frame(as.matrix(biom_data(clermontia_info))) #reorder columns by island otus <- otus[,c(9,8,2,7,1,4,5,10,3,6)] #mutate rownames to OTU names to join with taxonomy otus %>% mutate(OTUs = row.names(otus)) -> otus #Join OTUs and Taxonomy full_join(otus, taxonomy) -> everything #Remove Plantae, ie subset Fungi fungi_everything = subset(everything, Kingdom=="k__Fungi") #Fungal OTUs fungi_otus <- fungi_everything[,c(1:10)] #Subset all OTUs row sums >10 reads good_otus_everything <- fungi_everything[apply(fungi_everything[,1:10],1,sum) >10,] #Rarefy good_otus_everything[,1:10] -> good_otus min(colSums(good_otus)) -> rare t(good_otus) -> t_good_otus rrarefy(t_good_otus, rare) -> rare_good rowSums(rare_good) #Remove any OTUs with 0 reads rare_good_otus <- rare_good[,apply(rare_good[,1:2037],2,sum) >0] which(colSums(rare_good) == 0) #Transpose t(rare_good_otus) -> t_rare_good_otus #make community dissimilarity matrix of rarefied otus ComDists <- as.matrix(vegdist(rare_good_otus)) is.na(diag(ComDists)) <- TRUE #make distance matrix for lat/long Dists <- cbind(mapping$Longitude, mapping$Latitude) PhysDists <- as.matrix(distm(Dists)) is.na(diag(PhysDists)) <-TRUE library(udunits2) ud.convert(PhysDists, "m", "km") -> PhysDists as.Date(mapping[,7], "%m/%d/%y") -> date_up as.matrix(dist(date_up)) -> dist_date is.na(diag(dist_date)) <- TRUE #Mantel for days and physical distance mantel(dist_date, PhysDists, permutations = 10000) plot(PhysDists~dist_date, xlab="Pairwise time (days)", ylab="Distance (km)") #Mantel for days and community dissimilarity mantel(dist_date, ComDists, permutations = 10000) plot(ComDists~dist_date, xlab="Pairwise time (days)", ylab="Community dissimilarity (Bray-Curtis)") #Partial mantel for comm dist and phys dist, with time mantel.partial(ComDists, PhysDists, dist_date, permutations = 10000) #Plot physical distance over dissimilarity in ggplot #Unstack matrix dm1 = c(ComDists) dm2 = c(PhysDists) #Generate a new data frame (column names are dm1 and dm2) plot_dm = as.data.frame(cbind(dm2,dm1)) #plot with smoother (default method for N<1000 is loess) #Change from scientific to continuous require(scales) ggplot(plot_dm, aes(plot_dm[,1], plot_dm[,2])) + geom_point() + stat_smooth(method= lm) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour="black")) + scale_y_continuous(limits = c(0.7,1)) + labs(title = "") + xlab("Distance (km)") + ylab("Community dissimilarity (Bray-Curtis)") + scale_x_continuous(labels = comma) ##iNEXT rarefaction curves library(iNEXT) #To make pres/abs matrix decostand(rare_good_otus, "pa") -> pa_rare_otus #iNEXT for plant samples (n=10) t(pa_rare_otus) -> t_pa #Set number of intervals m <- c(0,1,2,3,4, 5,6,7,8,9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100) #iNEXT for samples out1 <- iNEXT(t_pa, q = c(0,1,2), datatype = "incidence_raw", size = m) ## iNEXT interp/extrap data out1$AsyEst #Plot ggiNEXT(out1, color.var = "order") ->p p + xlab("Clermontia samples") + ylab("Diversity") + scale_fill_discrete(labels = c("Chao1", "Shannon", "Simpson")) + theme_classic() #For species - (n=8 Clermontia spp.) #Combine kakeana into one column t_pa[,6]+t_pa[,7]+t_pa[,8] -> t_pa_kakeana cbind(t_pa_kakeana,t_pa[,1:5],t_pa[,9:10]) -> t_pa_species #Pres/abs for each species decostand(t_pa_species, "pa") -> pa_species #iNEXT extrapolation/interpolation for raw p/a data, q = species richness, #Chao, Shannon diversity, Simpson diversity out2 <- iNEXT(pa_species, q = c(0,1,2), datatype = "incidence_raw", size = m) ## iNEXT interp/extrap out2$AsyEst #Plot ggiNEXT(out2, color.var = "order") ->p p + xlab("Clermontia species") + ylab("Diversity") + scale_fill_discrete(labels = c("Chao1", "Shannon", "Simpson")) + theme_classic() #iNEXT by abundance out3 <- iNEXT(otus, q = 0, datatype = "abundance") ggiNEXT(out3) ->p p + xlab("Reads") + ylab("Richness") + theme_classic() ##Diversity #Richness per sample range(specnumber(rare_good_otus)) #Average richness/sample mean(specnumber(rare_good_otus)) std.error(specnumber(rare_good_otus)) #Average richness/island #abundance grouped by island Hawaii <- t_rare_good_otus[,1] + t_rare_good_otus[,2] + t_rare_good_otus[,3] + t_rare_good_otus[,4] Maui <- t_rare_good_otus[,5] + t_rare_good_otus[,6] Molokai <- t_rare_good_otus[,7] Oahu <- t_rare_good_otus[,8] + t_rare_good_otus[,9] Kauai <- t_rare_good_otus[,10] islands <- cbind(Hawaii,Maui,Molokai,Oahu,Kauai) specnumber(t(islands)) mean(specnumber(t(islands))) std.error(specnumber(t(islands))) #Subset rarefied otus for rel abun figures and diversity taxonomy_new <- cbind(Row.Names = rownames(taxonomy), taxonomy) new_rare_good_otus <- cbind(Row.Names = rownames(t_rare_good_otus), t_rare_good_otus) inner_join(data.frame(new_rare_good_otus), taxonomy_new) -> rare_tax cbind(rare_tax[,2:19]) -> rare_tax apply(rare_tax[,1:10], 2, as.numeric) -> rare_tax[,1:10] #Unknown taxa which(rare_tax$Family == "f__unidentified") which(rare_tax$Order == "o__unidentified") which(rare_tax$Phylum == "p__unidentified") ##Make relabundance plots - phyla then order #Sum by phyla as.matrix(aggregate(rare_tax[,1:10], by = list(rare_tax$Phylum), sum)) -> phylum_everything relabundance_phyla <- matrix(0, nrow=5, ncol = 10) for(i in 1:10){ s = sum(as.numeric(phylum_everything[,i+1])) relabundance_phyla[,i] = as.numeric(phylum_everything[,i+1])/s } barplot(relabundance_phyla, col=c("cadetblue3","lightsalmon2","green","darkmagenta","red3"), names.arg = colnames(phylum_everything[,2:11]), xlab = "Sample", ylab = "Relative abundance",border=NA) #For legend plot(0, type="n") legend("center", fill = c("cadetblue3","lightsalmon2","green","darkmagenta","red3"), title = "Phylum",legend = substr(phylum_everything[,1],4, 50), ncol=1, bty="n") #Sum by order as.matrix(aggregate(rare_tax[,1:10], by = list(rare_tax$Order), sum)) -> order_everything #Create empty matrix for relative abundances of orders add in with for loop (i+1 because of OTU row) relabundance <- matrix(0,nrow=65, ncol=10) for(i in 1:10){ s = sum(as.numeric(order_everything[,i+1])) relabundance[,i] = as.numeric(order_everything[,i+1])/s } row.names(relabundance) = order_everything[,1] which(rowSums(relabundance) %in% tail(sort(rowSums(relabundance)),10)) #top 10 orders rownames(relabundance)[which(rowSums(relabundance) %in% tail(sort(rowSums(relabundance)),10))] #Plot 10 most abundant orders colors <- grey.colors(65) colors[which(rowSums(relabundance) %in% tail((sort(rowSums(relabundance))),10))] <- c("aquamarine3","lightpink4","salmon1","thistle3","navy","seagreen", "darkseagreen","mediumpurple1","turquoise4","darkviolet") barplot(relabundance, col=colors, xlab = "Sample", ylab = "Relative abundance", names.arg = colnames(order_everything[,2:11])) #Legend for 10 most abundant orders plot(0, type="n") legend("center", fill = c("aquamarine3","lightpink4","salmon1","thistle3","navy","seagreen", "darkseagreen","mediumpurple1","turquoise4","darkviolet"), title = "Order", legend = c("Capnodiales","Chaetothyriales","Exobasidiales", "Incertae sedis","Peltigerales","Pertusariales","Pleosporales","Tremellales", "Unidentified", "Ustilaginales"), ncol=2, bty="n") ## Presence absence freq <- t_pa #presence grouped by island freq_Hawaii <- freq[,1] + freq[,2] + freq[,3] + freq[,4] freq_Maui <- freq[,5] + freq[,6] freq_Molokai <- freq[,7] freq_Oahu <- freq[,8] + freq[,9] freq_Kauai <- freq[,10] #All in one df, then p/a OTUs freq_island <- cbind(freq_Hawaii,freq_Maui,freq_Molokai,freq_Oahu,freq_Kauai) decostand(freq_island, "pa") -> pa_freq_islands #overlap all islands rowSums(pa_freq_islands) -> island_sums freq_islands_row <- cbind(pa_freq_islands,island_sums) which(freq_islands_row[,6] == 5) #Make Venn diagram for OTU overlap library(VennDiagram) venn.diagram(list("Hawai‘i"=which(pa_freq_islands[,1]==1), "Maui"=which(pa_freq_islands[,2]==1), "Moloka‘i"=which(pa_freq_islands[,3]==1), "O‘ahu"=which(pa_freq_islands[,4]==1), "Kaua‘i"=which(pa_freq_islands[,5]==1)), "/Users/erindatlof/Desktop/venn1.pdf", col = "black", fill = c("cornflowerblue", "seagreen2", "yellow", "darkorchid1", "darkorange1"), alpha = 0.50, cex = c(1.5, 1.5, 1.5, 1.5, 1.5, 1, 0.8, 1, 0.8, 1, 0.8, 1, 0.8, 1, 0.8, 1, 0.55, 1, 0.55, 1, 0.55, 1, 0.55, 1, 0.55, 1, 1, 1, 1, 1, 1.5), cat.col = "black", cat.cex = 1.5, cat.dist = 0.24, cat.fontface = "bold", cat.fontfamily = "calibri", margin = 0.1) #Rarefaction curve for each sample fungi_everything[,1:10] -> fungi_otus t(fungi_otus) -> t_fungi_otus rarecurve(t_fungi_otus, step = 10, col = colVec, xlab = "Hundred Thousand Reads", ylab = "OTUs") colVec <- c("firebrick", "sienna1", "maroon", "dimgray", "turquoise4", "springgreen4", "springgreen4", "springgreen4", "royalblue3", "purple") colVec_sub <- c("firebrick", "sienna1", "maroon", "dimgray", "turquoise4", "springgreen4", "royalblue3", "purple") #Island legend plot(0, type="n") legend("center", fill = colVec_sub, title = "Clermontia", legend = c("calophylla", "kohalae", "clermoniotides", "peleana", "arborescens", "kakeana", "oblongifolia", "fauriei"), ncol=2) #abundance grouped by island non-rare Hawaii <- good_otus[,1] + good_otus[,2] + good_otus[,3] + good_otus[,4] Maui <- good_otus[,5] + good_otus[,6] Molokai <- good_otus[,7] Oahu <- good_otus[,8] + good_otus[,9] Kauai <- good_otus[,10] #richness for each island islands_all <- cbind(Hawaii,Maui,Molokai,Oahu,Kauai) specnumber(t(islands_all)) #Rarefaction curve for each island island_groups_is <- cbind(Hawaii,Maui,Molokai,Oahu,Kauai) t(island_groups_is) -> t_island_groups_is rarecurve(t_island_groups_is, step = 5, col=c("cadetblue3","lightsalmon2","green","darkmagenta","salmon"), xlab = "Hundred Thousand Reads", ylab = "OTUs", xaxt="n") axis(1, at=c(0, 200000, 400000, 600000, 800000), labels=c("0", "2", "4", "6", "8"), xlab="Hundred Thousand Reads") #Species Accumulation curve plot(specaccum(rare_good_otus), xlab="Clermontia samples",ylab="OTUs", ci.type="poly", col="turquoise4", lwd=2, ci.lty=0, ci.col="snow3", ylim=c(0,2000))