library("devtools") library("dada2") library(phyloseq) library(ggplot2) library(plyr) library(vegan) library(picante) library(TSA) library(nortest) library(multcomp) library(car) library(mctoolsr) library(microbiome) library(mvabund) library(MASS) library(geepack) library(doBy) library(lattice) library(MuMIn) #---------------DADA2------------------- fnFs <- sort(list.files(path, pattern="_R1_001.fastq", full.names = TRUE)) fnRs <- sort(list.files(path, pattern="_R2_001.fastq", full.names = TRUE)) length(fnFs) length(fnRs) sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) plotQualityProfile(fnFs[1:20]) plotQualityProfile(fnRs[1:20]) filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, trimLeft=20, truncLen=c(220,200), # adjsut as needed maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread=TRUE) # On Windows set multithread=FALSE head(out, n=40) errF <- learnErrors(filtFs, multithread=TRUE) errR <- learnErrors(filtRs, multithread=TRUE) plotErrors(errF, nominalQ=TRUE) derepFs <- derepFastq(filtFs, verbose=TRUE) derepRs <- derepFastq(filtRs, verbose=TRUE) names(derepRs) <- sample.names dadaFs <- dada(derepFs, err=errF, multithread=TRUE) dadaRs <- dada(derepRs, err=errR, multithread=TRUE) dadaFs[[1]] dadaRs[[1]] mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) head(mergers[[1]]) seqtab <- makeSequenceTable(mergers) dim(seqtab) table(nchar(getSequences(seqtab))) seqtab2 <- seqtab[,nchar(colnames(seqtab)) %in% seq(250,256)] seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) dim(seqtab.nochim) sum(seqtab.nochim)/sum(seqtab) getN <- function(x) sum(getUniques(x)) track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim)) colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track) <- sample.names head(track) taxa <- assignTaxonomy(seqtab.nochim, "unzip/silva_nr_v138_train_set.fa.gz", multithread=TRUE) colnames(taxa) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") taxa[is.na(taxa)] <- "unclassified" # change NA to unclasssified sort(unique(taxa[,1])) # include taxonomic levels of kingdom,[,2], phylum, [,3] and class to find chloroplasts and mitochondria taxa2<-data.frame(taxa) ## convert to data.frame taxa2<-subset(taxa2,Kingdom=="Bacteria" & Order!="Chloroplast" & Family!="Mitochondria") # I keep bacteria and eliminate chloroplasts and mitochondria taxa<-as.matrix(taxa2);rm(taxa2) b<-t(seqtab.nochim) ASVsfiltered <- b[rownames(taxa),] # I keeo the ASVs in table with filtered taxonomy seqtab.nochim<-t(ASVsfiltered);rm(b);rm(ASVsfiltered) seqtab.nochim1 <- seqtab.nochim colnames(seqtab.nochim1) <- paste0("ASV", 1:ncol(seqtab.nochim1)) taxa1 <- taxa rownames(taxa1) <- paste0("ASV", 1:nrow(taxa1)) export_taxa_table_and_seqs = function(seqtab.nochim, file_seqtab, file_seqs) { seqtab.t = as.data.frame(t(seqtab.nochim)) seqs = row.names(seqtab.t) row.names(seqtab.t) = paste0("ASV", 1:nrow(seqtab.t)) outlist = list(data_loaded = seqtab.t) mctoolsr::export_taxa_table(outlist, file_seqtab) seqs = as.list(seqs) seqinr::write.fasta(seqs, row.names(seqtab.t), file_seqs) } library(mctoolsr) export_taxa_table_and_seqs(seqtab.nochim,"ASV_spec.txt","ASV_spec.fa") #QIIME2 qiime tools import --input-path /ASV_spec.fa --output-path /ASV_seqs.qza --type 'FeatureData[Sequence]' qiime phylogeny align-to-tree-mafft-fasttree --i-sequences /ASV_seqs.qza --o-alignment /ASV_seqs_aligned.qza --o-masked-alignment /ASV_seqs_aligned_masked.qza --o-tree /unrooted_tree.qza --o-rooted-tree /rooted_tree.qza qiime tools export /rooted_tree.qza --output-dir /tree_for r #----------Load BIOM file, tree and metadata and analyze microbiomes using phyloseq and other packages theme_set(theme_bw()) tree<-read.tree(file.choose()) data<-import_biom(file.choose(),tree);data metadata<-read.table(file.choose(),h=T,row.names = 1,sep='\t') ls.str(metadata) sample_data(data)<-metadata min(colSums(otu_table(data))) data ##### Negative Binomial distribution diagdds = phyloseq_to_deseq2(data, ~local) # Calculate geometric means gm_mean = function(x, na.rm=TRUE){ exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) } geoMeans = apply(counts(diagdds), 1, gm_mean) diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans) normcounts <- counts(diagdds, normalized = TRUE) round(normcounts, digits = 0) -> normcountsrd otu_table(normcountsrd, taxa_are_rows = TRUE) -> ncr otu_table(data) <- ncr otuD<-as.data.frame(t(otu_table(data))) phylodiversityRAREF_Q<-pd(otuD, phy_tree(data), include.root=TRUE) ### filogenetic diversity. Include root =True tree rooted via midpoint diversityRAREF_Q<-estimate_richness(data) diversityRAREF_Q1<-cbind(sample_data(data),diversityRAREF_Q,phylodiversityRAREF_Q) #------------------------------------------alpha-div ~ locality + (1|species)-------------------------------------- anova(lmer(Observed ~ locality + (1|species), metadata)) anova(lmer(Shannon ~ locality + (1|species), metadata)) anova(lmer(PD ~ locality + (1|species), metadata)) #---------------------------------------alpha-div ~ species + species/sex + (1|locality)----------------------------------------- anova(lmer(Observed ~ species + species/sex + (1|locality), data= metadata)) anova(lmer(Shannon ~ species + species/sex + (1|locality), data= metadata)) anova(lmer(PD ~ species + species/sex + (1|locality), data= metadata)) #---------------------------------------beta-div ~ locality + species----------------------------------------- uniun1<-phyloseq::distance(Q_raref, method="unifrac") uniun1d<-as.matrix(uniun1) uniweigh1<-phyloseq::distance(Q_raref, method="wunifrac") uniweigh1d<-as.matrix(uniweigh1) brayd1<-phyloseq::distance(Q_raref, method="bray") brayd1d<-as.matrix(brayd1) adonis2(uniun1d ~ locality + species, data = diversity_table, perm = 9999) adonis2(uniweigh1d ~ locality + species, data = diversity_table, perm = 9999) adonis2(brayd1d ~ locality + species, data = diversity_table, perm = 9999) # Phyla Q_RAREF 3% rank_names(datos, errorIfNULL=TRUE) # view taxonomic ranks taxon <- tax_glom(Q_raref, taxrank = "Phyla") taxon.tr <- transform_sample_counts(taxon, function (x) x/sum(x)) taxon.tr taxon.tr.f <- filter_taxa(taxon.tr, function (x) mean(x) > 3e-2, TRUE) # filter taxa below 3% taxon.tr.f sample_variables(taxon.tr.f) otu_table(taxon.tr.f) #Export table of phyla with taxonomy table_all<-cbind(tax_table(taxon.tr.f),otu_table(taxon.tr.f)) write.csv(table_all,file="phyla_abund_tax3_localities.csv") # Genera Q_RAREF 3% taxon <- tax_glom(Q_raref, taxrank = "Genera") taxon.tr <- transform_sample_counts(taxon, function (x) x/sum(x)) taxon.tr taxon.tr.f <- filter_taxa(taxon.tr, function (x) mean(x) > 3e-2, TRUE) # filter taxa below 3% taxon.tr.f sample_variables(taxon.tr.f) otu_table(taxon.tr.f) #Export table of genera with taxonomy table_all<-cbind(tax_table(taxon.tr.f),otu_table(taxon.tr.f)) write.csv(table_all,file="genera_abund_tax3_localities.csv") #PCOA p2 = phyloseq::plot_ordination(Q_raref, ordinate(Q_raref, method="PCoA", dist="bray"), type = "samples", color = "species", shape="locality") p2 + geom_point(size = 4.5) + ggtitle("PCoA Unweigthed UNIFRAC") + stat_ellipse(aes(fill = locality)) + scale_shape_manual(values=c(16,17), labels=c("Lisbon","Moledo"), name="Localities") p2 = phyloseq::plot_ordination(Q_raref, ordinate(Q_raref, method="PCoA", dist="unifrac"), type = "samples", color = "species", shape="locality") p2 + geom_point(size = 4.5) + ggtitle("PCoA Bray-Curtis") + stat_ellipse(aes(fill = locality)) + scale_shape_manual(values=c(16,17), labels=c("Lisbon","Moledo"), name="Localities") #ALPHA-DIVERSITY PLOTS shan <- ggplot(diversityRAREF_Q1, aes(species, Shannon)) shan2<-shan + geom_boxplot(aes(fill = species),outlier.colour = "black", outlier.size = 1)+ geom_jitter(size=1,shape=1)+ ggtitle("Shannon diversity")+labs(y = "Shannon diversity") + scale_fill_brewer(palette="Set1") plot(shan2) observed <- ggplot(diversityRAREF_Q1, aes(species, Observed)) observed2<-observed + geom_boxplot(aes(fill = species),outlier.colour = "black", outlier.size = 1)+ geom_jitter(size=1,shape=1) +labs(y = "Observed ASVs")+ scale_fill_brewer(palette="Set1") plot(observed2) phyl <- ggplot(diversityRAREF_Q1, aes(species, PD)) phyl2 <-phyl + geom_boxplot(aes(fill = species),outlier.colour = "black", outlier.size = 1)+ geom_jitter(size=1,shape=1) +labs(y = "Faith's diversity")+ scale_fill_brewer(palette="Set1") plot(phyl2) #----------------------------LISBON samples--------------------- Lisbon <- subset_samples(Q_raref, locality=="Lisbon"); Lisbon min(colSums(otu_table(Lisbon))) otuD<-as.data.frame(t(otu_table(Lisbon))) phylodiversityRAREF_lx<-pd(otuD, tree, include.root=TRUE) ### filogenetic diversity. Include root =True tree rooted via midpoint diversityRAREF_LX<-estimate_richness(Lisbon) diversityRAREF_Lisbon<-cbind(sample_data(Lisbon),diversityRAREF_LX,phylodiversityRAREF_lx) uniunLisbon<-phyloseq::distance(Lisbon, method="unifrac") uniunLisbond<-as.matrix(uniunLisbon) uniweighLisbon<-phyloseq::distance(Lisbon, method="wunifrac") uniweighLisbond<-as.matrix(uniweighLisbon) braydLisbon<-phyloseq::distance(Lisbon, method="bray") braydLisbond<-as.matrix(braydLisbon) > t1<-pairwise.adonis2(uniweighLisbond~species + (species/sex),perm=9999, data=diversityRAREF_Lisbon); t1 > t1<-pairwise.adonis2(uniunLisbond~species + (species/sex),perm=9999, data=diversityRAREF_Lisbon); t1 > t1<-pairwise.adonis2(braydLisbond~species + (species/sex),perm=9999, data=diversityRAREF_Lisbon); t1 ------------------LISBON <3% MOST ABUNDANT GENERA--------------- t1<-lm(Helicobacter~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Corynebacterium~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Parabacteroides~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Bacteroides~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Odoribacter~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Lachnospiraceae_unclassified~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) t1<-lm(Enterobacteriaceae_unclassified~species+ (species/sex), GenusLisbon3) summary(t1) anova(t1) ##----PLOT_ABUNDANTGENUS_LISBOA asv_table_3perct_lisbon <- read.table(file.choose(),h=T,row.names = 1,sep='\t') taxonomy_3perct_lisbon <- read.table(file.choose(),h=T,row.names = 1,sep='\t') table_genus_3perct_lisbon <- phyloseq(otu_table(as.matrix(asv_table_3perct_lisbon), taxa_are_rows=TRUE), sample_data(as.data.frame(metadata)), tax_table(as.matrix(taxonomy_3perct_lisbon)), phy_tree(tree)) my_plot_bar(table_genus_3perct_lisbon, fill="Genus") +facet_grid(~species, scales = "free_x", space = "free_x") ------------------------------MOLEDO samples--------------------------------- Moledo <- subset_samples(Q_raref, locality=="Moledo"); Moledo > otuD<-as.data.frame(t(otu_table(Moledo))) > phylodiversityRAREF_Mol<-pd(otuD, tree, include.root=TRUE) ### filogenetic diversity. Include root =True tree rooted via midpoint > diversityRAREF_Mol<-estimate_richness(Moledo) > diversityRAREF_Moledo<-cbind(sample_data(Moledo),diversityRAREF_Mol,phylodiversityRAREF_Mol) > uniunMoledo<-phyloseq::distance(Moledo, method="unifrac") > uniunMoledod<-as.matrix(uniunMoledo) > uniweighMoledo<-phyloseq::distance(Moledo, method="wunifrac") > uniweighMoledod<-as.matrix(uniweighMoledo) > braydMoledo<-phyloseq::distance(Moledo, method="bray") > braydMoledod<-as.matrix(braydMoledo) > t1<-pairwise.adonis2(braydMoledod~species + (species/sex),perm=9999, data=diversityRAREF_Moledo); t1 > t1<-pairwise.adonis2(uniunMoledod~species + (species/sex),perm=9999, data=diversityRAREF_Moledo); t1 > t1<-pairwise.adonis2(uniweighMoledod~species + (species/sex),perm=9999, data=diversityRAREF_Moledo); t1 #####MOLEDO <3% MOST ABUNDANT GENERA t1<-lm(Helicobacter~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Corynebacterium~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Selenomonadaceae_unclassified~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Parabacteroides~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Bacteroides~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Odoribacter~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Pseudomonas~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Lachnospiraceae_unclassified~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) t1<-lm(Enterobacteriaceae_unclassified~species+ (species/sex), GenusMoledo3) summary(t1) anova(t1) PLOT_ABUNDANTGENUS_MOLEDO asv_table_3perct_moledo <- read.table(file.choose(),h=T,row.names = 1,sep='\t') taxonomy_3perct_moledo <- read.table(file.choose(),h=T,row.names = 1,sep='\t') table_genus_3perct_moledo <- phyloseq(otu_table(as.matrix(asv_table_3perct_moledo), taxa_are_rows=TRUE), sample_data(as.data.frame(metadata)), tax_table(as.matrix(taxonomy_3perct_moledo)), phy_tree(tree)) my_plot_bar(table_genus_3perct_moledo, fill="Genus") +facet_grid(~species, scales = "free_x", space = "free_x") #BOXPLOTS MOLEDO Corynebacterium Corynebacterium <- ggplot(GenusMoledoNObact, aes(species_sex, Corynebacterium)) Corynebacterium2<-Corynebacterium + geom_boxplot(aes(fill = species_sex),outlier.colour = "black", outlier.size = 1)+ geom_jitter(size=1,shape=1)+ ggtitle("Corynebacterium")+labs(y = "Corynebacterium") plot(Corynebacterium2) #----------------------Pearson Correlation---------------------------- install.packages("ggpubr") library("ggpubr") #Run the Pearson correlation test (REPEAT THE SAME ANALYSIS FOR EACH SPECIES) cor.test(PodarcisSiculaData$size, PodarcisSiculaData$Observed, method = "pearson") cor.test(PodarcisSiculaData$size, PodarcisSiculaData$Shannon, method = "pearson") cor.test(PodarcisSiculaData$size, PodarcisSiculaData$PD, method = "pearson") ggscatter(PodarcisSiculaData, x = "size", y = "Observed.ASVs", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "pearson", xlab = "Size", ylab = "Observed") ggscatter(PodarcisSiculaData, x = "size", y = "Shannon", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "pearson", xlab = "Size", ylab = "Shannon") ggscatter(PodarcisSiculaData, x = "size", y = "PD", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "pearson", xlab = "Size", ylab = "PD") #-------------------------------FEAST---------------------------------- devtools::install_github("cozygene/FEAST") library("FEAST") __________________________________ #SICULUS metadataSICULUS <- Load_metadata(metadata_path = "/metadata_feast_sourcesicula.txt") otusLX <- Load_CountMatrix(CountMatrix_path = "/samplesLX.txt") FEAST_output <- FEAST(C = otusLX, metadata = metadataSICULUS, EM_iterations = 1000, COVERAGE = NULL, different_sources_flag = 0, dir_path = "C:/R/PhyloSeqMicrobiome/newfeast/FEAST/Data_files", outfile="podarcis") _________________ #VIRESCENS metadataVIRESCENS <- Load_metadata(metadata_path = "/metadata_feast_sourcevirescens.txt") otusLX <- Load_CountMatrix(CountMatrix_path = "/samplesLX.txt") FEAST_output <- FEAST(C = otusLX, metadata = metadataVIRESCENS, EM_iterations = 1000, COVERAGE = NULL, different_sources_flag = 0, dir_path = "C:/R/PhyloSeqMicrobiome/newfeast/FEAST/Data_files", outfile="podarcis") __________ #BOCAGEI metadataBOCAGEI <- Load_metadata(metadata_path = "/metadata_feast_sourcebocagei.txt") otusMOLEDO <- Load_CountMatrix(CountMatrix_path = "/samplesMOLEDO.txt") FEAST_output <- FEAST(C = otusMOLEDO, metadata = metadataBOCAGEI, EM_iterations = 1000, COVERAGE = NULL, different_sources_flag = 0, dir_path = "C:/R/PhyloSeqMicrobiome/newfeast/FEAST/Data_files", outfile="podarcis") __________ #LUSITANICUS metadataLUSITANICUS <- Load_metadata(metadata_path = "/metadata_feast_sourcelusitanicus.txt") otusMOLEDO <- Load_CountMatrix(CountMatrix_path = "/samplesMOLEDO.txt") FEAST_output <- FEAST(C = otusMOLEDO, metadata = metadataLUSITANICUS, EM_iterations = 1000, COVERAGE = NULL, different_sources_flag = 0, dir_path = "C:/R/PhyloSeqMicrobiome/newfeast/FEAST/Data_files", outfile="podarcis")