--- title: "Marsden Cove ddPCR data processing" author: "Ulla & Oli" date: "November 2023" output: distill::distill_article: toc: true theme: night highlight: pygments center: true code_folding: true --- ### Loading libraries ```{r Loading libraries, message=FALSE, warning=FALSE, code_folding=TRUE} library(biohelper) library(ggplot2) library(dplyr) library(data.table) library(Biostrings) library(janitor) library(ggthemes) library(DT) library(tibble) library(tidyverse) library(UpSetR) library(vegan) library(plotly) library(crosstalk) library(ranacapa) library(plyr) library(glue) library(RColorBrewer) library(DECIPHER) library(UpSetR) library(theseus) library(microViz) library(grid) library(ggpubr) library(pracma) library(ggbreak) library(patchwork) library(twoddpcr) # options(rgl.useNULL=TRUE) # library(dpcR) dir.create("../Results_data_analysis") time_func = function(df){ df$time2 = 0 for (i in 1:nrow(df)) { if(df$time[i] == 0.1){ df$time2[i] = 10 }else if(df$time[i] == 0.3){ df$time2[i] = 30 }else if(df$time[i] == 1.0){ df$time2[i] = 60 }else if(df$time[i] == 3.0){ df$time2[i] = 180 }else if(df$time[i] == 6.0){ df$time2[i] = 360 }else if(df$time[i] == 12.0){ df$time2[i] = 720 } } return(df) } ``` ##### Bugula ```{r} ########### RUN 2 plate <- ddpcrPlate(wells = "../ddPCR_data/Bugula/Run 2/") meta = readxl::read_xlsx("../ddPCR_data/Bugula/Run 2/ddPCR Run 2 Begula Marsden cove 22-10-31.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) facetPlot(plate) dropletPlot(plate[["G01"]], cMethod="Cluster") ch1_lim = 18500 ch2_lim = 7500 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) facetPlot(plate[c("G01","H01")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) facetPlot(plate[c("G01","H01")], cMethod="thresholdsMahRain", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") commonClassificationMethod(plate) facetPlot(plate, cMethod="thresholds", pointSize = 0.5) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Bugudula_r2 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ########### RUN 3 plate <- ddpcrPlate(wells= "../ddPCR_data/Bugula/Run 3/") meta = readxl::read_xlsx("../ddPCR_data/Bugula/Run 3/ddPCR Run 3 Begula Marsden cove 22-11-02.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) #facetPlot(plate) dropletPlot(plate[["G01"]], cMethod="Cluster") ch1_lim = 18500 ch2_lim = 7500 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) facetPlot(plate[c("G01","H01")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) facetPlot(plate[c("G01","H01")], cMethod="thresholdsMahRain", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") facetPlot(plate, cMethod="thresholds", pointSize = 0.5) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Bugudula_r3 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ########### RUN 5 plate <- ddpcrPlate(wells= "../ddPCR_data/Bugula/Run 5/") meta = readxl::read_xlsx("../ddPCR_data/Bugula/Run 5/ddPCR Run 5 Begula Marsden cove 22-11-03.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) #facetPlot(plate) dropletPlot(plate[["H11"]], cMethod="Cluster") ch1_lim = 18500 ch2_lim = 7500 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) dropletPlot(plate[["H11"]], cMethod="thresholds")+ geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") + theme_classic() facetPlot(plate[c("H11","C11","D11","E11","F11","G11")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") + theme_classic() plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) # facetPlot(plate[c("G01","H01")], # cMethod="thresholdsMahRain", # pointSize = 1) + # geom_hline(yintercept = ch1_lim, linetype="dotted")+ # geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Bugudula_r5 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ``` ##### Styela ```{r} ########### RUN 1 plate <- ddpcrPlate(wells= "../ddPCR_data/Styela/Run 1/") meta = readxl::read_xlsx("../ddPCR_data/Styela/Run 1/ddPCR Run 1 Styela 22-10-18.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) #facetPlot(plate) dropletPlot(plate[["H01"]], cMethod="Cluster") ch1_lim = 2500 ch2_lim = 6000 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) facetPlot(plate[c("G01","H01")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) facetPlot(plate[c("G01","H01")], cMethod="thresholdsMahRain", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") commonClassificationMethod(plate) facetPlot(plate, cMethod="thresholds", pointSize = 0.5) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Styela_r1 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ########### RUN 2 plate <- ddpcrPlate(wells= "../ddPCR_data/Styela/Run 2/") meta = readxl::read_xlsx("../ddPCR_data/Styela/Run 2/ddPCR Run 2 Styela Marsden cove 22-10-19.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) #facetPlot(plate) facetPlot(plate[c("G12","H12")], cMethod="Cluster", pointSize = 1) ch1_lim = 2500 ch2_lim = 2500 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) facetPlot(plate[c("G12","H12")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) facetPlot(plate[c("G12","H12")], cMethod="thresholdsMahRain", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") # facetPlot(plate, # cMethod="thresholds", # pointSize = 0.5) + # geom_hline(yintercept = ch1_lim, linetype="dotted")+ # geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Styela_r2 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ########### RUN 3 plate <- ddpcrPlate(wells= "../ddPCR_data/Styela/Run 3/") meta = readxl::read_xlsx("../ddPCR_data/Styela/Run 3/ddPCR Run 3 Styela Marsden cove 22-10-19.xlsx",sheet = 2, col_names = F) %>% magrittr::set_colnames(c("well","sample_name")) facetPlot(plate[c("G11","H11")]) facetPlot(plate[c("G11","H11")], cMethod="Cluster", pointSize = 1) ch1_lim = 2500 ch2_lim = 2500 plate <- thresholdClassify(plate, ch1Threshold=ch1_lim, ch2Threshold=ch2_lim) facetPlot(plate[c("G11","H11")], cMethod="thresholds", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") plate <- mahalanobisRain(plate, cMethod="thresholds", maxDistances=list(NN=15, NP=15, PN=15, PP=15)) facetPlot(plate[c("G11","H11")], cMethod="thresholdsMahRain", pointSize = 1) + geom_hline(yintercept = ch1_lim, linetype="dotted")+ geom_vline(xintercept = ch2_lim, linetype="dotted") # facetPlot(plate, # cMethod="thresholds", # pointSize = 0.5) + # geom_hline(yintercept = ch1_lim, linetype="dotted")+ # geom_vline(xintercept = ch2_lim, linetype="dotted") df_plate = plateSummary(plate, cMethod="thresholds") df_plate_r = plateSummary(plate, cMethod="thresholdsMahRain") %>% rownames_to_column("well") df_Styela_r3 = left_join(df_plate_r %>% dplyr::select(c(well, TotalCopiesPer20uLWell)), meta) ########################################### ########################################### # Merging and saving results ########################################### df_Styela = bind_rows(df_Styela_r2 %>% dplyr::select(sample_name, TotalCopiesPer20uLWell), df_Styela_r3 %>% dplyr::select(sample_name, TotalCopiesPer20uLWell)) %>% janitor::clean_names() %>% dplyr::mutate(copies_u_l = total_copies_per20u_l_well / 20) which(df_Styela$sample_name %ni% metadata$id) df_Styela_test = df_Styela %>% dplyr::filter(sample_name %ni% metadata$id) ``` --- title: "Data clean-up | 18S" --- ### Loading libraries ```{r Loading libraries, message=FALSE, warning=FALSE, code_folding=TRUE} library(biohelper) library(ggplot2) library(dplyr) library(data.table) library(Biostrings) library(janitor) library(ggthemes) library(DT) library(tibble) library(tidyverse) library(UpSetR) library(vegan) library(plotly) library(crosstalk) library(ranacapa) library(plyr) library(glue) library(theseus) library(microViz) ``` ### Loading the ps object and adding metadata ```{r Loading files, message=FALSE, warning=FALSE, code_folding=TRUE} ps = readRDS("../Metabarcoding_data/ps_run.rds") metadata = read_csv("../Metadata/metadataMarsden.csv")%>% clean_names() #%>% as.matrix() %>% as.data.frame() if(all(metadata$sample_id %in% sample_names(ps))){ #cat("\nAll metadata samples are in the ASV table\n") }else{ cat("\nMissing samples in the ASV table?\n") cat(metadata[which(metadata$sample_id %ni% sample_names(ps)),]$sample_id) } if(all(sample_names(ps) %in% metadata$sample_id)){ #cat("\nAll samples in the ASV table are in the metadata\n") metadata = metadata[match(sample_names(ps),metadata$sample_id),] } else{ cat("\nMissing samples in the metadata table?\n") cat(sample_names(ps)[which(sample_names(ps) %ni% metadata$sample_id)]) } #cat("\nAdding metadata to the ps object\n") #all(sample_names(ps) == metadata$sample_id) ps = merge_phyloseq(ps,sample_data(metadata) %>% magrittr::set_rownames(metadata$sample_id)) ps@tax_table@.Data = ps@tax_table@.Data %>% as.data.frame() %>% janitor::clean_names() %>% dplyr::select(-"na") %>% as.matrix() # Normalising 18S taxonomy ps = biohelper::taxo_normalisation(obj = ps, sqlFile = "~/Dropbox/R_scripts_N_bioinfo/Building_packages/biohelper/accessionTaxa.sql", ranks = c("superkingdom","kingdom", "phylum","class","order","family","genus","species"), keepSAR = T) # Adding ASV names in taxa table ps@tax_table@.Data = ps@tax_table@.Data %>% as.data.frame() %>% dplyr::mutate("asv" = taxa_names(ps)) %>% as.matrix() ``` ### Inspecting data and library size ```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} rd = fread("../Metabarcoding_data/Cleaned_data/Read_counts.csv") rd$percent_retained_reads = round(rd$nonchim / rd$input * 100,2) DT::datatable(rd, options = list(pageLength = 10)) ``` ```{r echo=FALSE, message=FALSE, warning=FALSE, code_folding=TRUE} cat("\n\nData info\n") ps cat("\n\nMetadata info\n") metadata %>% skimr::skim() df <- ps %>% pstoveg_sample() if(is.null(df$sample_id)){ df = df %>% rownames_to_column("sample_id") } df$LibrarySize <- sample_sums(ps) df <- df[order(df$LibrarySize),] df$Index <- seq(nrow(df)) p1=ggplot(data=df[df$amplicon_type == "sample",], aes(x=Index, y=LibrarySize, colour = amplicon_type, label = sample_id)) + geom_point() + theme_stata() + ggtitle("Sequencing depth per sample") +xlab("Sample index") + ylab("Library size") + scale_color_brewer(palette="Paired",name = "Amplicon type") p2=ggplot(data=df[df$amplicon_type != "sample",], aes(x=Index, y=LibrarySize, colour = amplicon_type, label = sample_id)) + geom_point() + theme_stata() + ggtitle("Sequencing depth per blank") +xlab("Sample index") + ylab("Library size") + scale_color_brewer(palette="Paired",name = "Amplicon type") ggplotly(p1) ggplotly(p2) ``` ### Inspection of contamination #### Looking at reads and richness in blanks ```{r echo=FALSE, message=FALSE, warning=FALSE} ps_blanks = ps %>% microViz::ps_filter(amplicon_type != "sample") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) ps_blanks # # # Creating a table Contamination = ps_blanks %>% sample_names() %>% as.data.frame(stringsAsFactors=F) names(Contamination) = "sample_name" # ## Samples sum Contamination$Read_count = ps_blanks %>% sample_sums() Contamination$Blank_type = ps_blanks@sam_data$amplicon_type # # ## Diversity R = estimate_richness(ps_blanks, measures="Observed") Contamination$Richness = R$Observed # # ## Ordering the table Contamination = Contamination %>% dplyr::arrange(Richness) # Contamination %>% datatable(class = 'cell-border stripe', caption = "Blanks and their read count and richness", options = list(pageLength = 100)) # dir.create("../Metabarcoding_data/Contamination") write.table(Contamination,"../Metabarcoding_data/Contamination/Contamination.txt", quote = F, sep = "\t", row.names = F) ``` #### Proportion of blank ASVs only in blanks ```{r echo=FALSE, message=FALSE, warning=FALSE, collapse=T} ASVs_in_Blanks = taxa_names(ps_blanks) ASVs_in_Blanks_only = ASVs_in_Blanks[which(ASVs_in_Blanks %ni% (ps %>%subset_samples(amplicon_type == "sample") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% taxa_names()) )] paste0("Percent of ASVs in blanks only found in blanks: ",round(length(ASVs_in_Blanks_only)/length(ASVs_in_Blanks)*100,2)," %") ``` #### Looking at the importance of ASVs found in contamination ```{r echo=FALSE, message=FALSE, warning=FALSE, collapse=T, fig.height=8, fig.width=10} ps_samples_RA = ps %>% subset_samples(amplicon_type == "sample") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% microbiome::transform(transform = "compositional") ps_samples_RA_contam = ps_samples_RA %>% subset_taxa(asv %in% ASVs_in_Blanks) tb = round(taxa_sums(ps_samples_RA_contam) / nsamples(ps_samples_RA_contam) *100,2) %>% as.data.frame() %>% setNames("Mean relative abundance per taxa (%)") %>% dplyr::arrange("Mean relative abundance per taxa (%)") tax = ps@tax_table@.Data %>% as.data.frame() tb$taxo = "" for (i in 1:nrow(tb)) { tb$taxo[i] = tax[tax$asv == rownames(tb)[i],] %>% unite('taxo', kingdom:genus, sep = ": ") } round(sample_sums(ps_samples_RA_contam) *100,2) %>% as.data.frame() %>% setNames("Mean relative abundance per sample (%)") %>% dplyr::arrange(desc(`Mean relative abundance per sample (%)`)) %>% datatable(class = 'cell-border stripe', caption = "Mean relative abundance of taxa found in blanks in samples", options = list(pageLength = 10)) df_ps_samples_RA_contam = ps_samples_RA_contam %>% psmelt() fig2 <- plot_ly(df_ps_samples_RA_contam, x = ~sample_id, y = ~Abundance*100, color = ~family, type = 'bar') %>% layout(yaxis = list(title = 'Relative abundance (%)'), barmode = 'stack') %>% layout(legend=list(title=list(text=' Family '))) fig2 ASVs_in_Blanks_only2 = psmelt(ps_samples_RA_contam) ASVs_in_Blanks_only2 = ASVs_in_Blanks_only2 %>% dplyr::group_by(superkingdom, kingdom, phylum, class, order, family, genus, species, asv) %>% dplyr::summarise(Mean_RA = round(mean(Abundance)*100,2), SD = sd(Abundance)*100 ) write.table(ASVs_in_Blanks_only2, "../Metabarcoding_data/Contamination/Taxo_blank_ASVs_in_samples.txt", quote = F, row.names = F, sep = "\t") ``` #### Effect of removing blanks following different methodologies ```{r echo=FALSE} cat("\nRemoving all taxa found in blanks\n") ps_trimmed = ps %>% biohelper::ps_decon(method = "complete_asv_removal") cat("\nRemoving reads from taxa found in blanks using max_v\n") ps_trimmed = ps %>% biohelper::ps_decon(method = "max_v") cat("\nRemoving reads from taxa found in blanks using microDecon\n") ps_trimmed = ps %>% biohelper::ps_decon(method = "microDecon", group = "matrix") # In case contamination was substantial and we decide to keep all reads/ASVs to avoid creating a large impact on the data ps_trimmed = ps %>% subset_samples(amplicon_type %in% c("sample","Sample")) %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) ``` ### Looking at eukaryotic and non-eukaryotic taxa ```{r echo=FALSE, fig.height=5, fig.width=10, collapse=TRUE} paste0("Number of reads from Eukaryotes: ",ps_trimmed %>% subset_taxa(superkingdom == "Eukaryota" | family %ni% "Hominidae") %>% sample_sums() %>% sum()) paste0("Number of reads from non-eukaryotes: ",ps_trimmed %>% subset_taxa(superkingdom != "Eukaryota" | family %in% "Hominidae")%>% sample_sums() %>% sum()) Non_euk_ASVs = ps_trimmed %>% subset_taxa(superkingdom != "Eukaryota" | family %in% "Hominidae") %>% taxa_names() ps_trimmed_non_euk = ps_trimmed %>% transform_sample_counts(function(x) 100 * x/sum(x)) %>% subset_taxa(superkingdom!="Eukaryota"| family %in% "Hominidae") ps_trimmed_non_euk@tax_table@.Data = ps_trimmed_non_euk@tax_table@.Data %>% as.data.frame() %>% replace(is.na(.), "NA") %>% as.matrix() tax2 = ps_trimmed_non_euk@tax_table@.Data %>% as.data.frame() plot_bar(ps_trimmed_non_euk, 'sample_id', fill = "phylum", title = "Non-eukaryotes reads") + ylab("Reads (%)") + theme_calc() + theme(axis.text.x = element_text(angle = 90, size=4)) + xlab("Sample unique ID") # Looking at percent of ASVs belonging to non-Eukaryota Non_euk_ASVs_nb = ps_trimmed %>% subset_taxa(superkingdom != "Eukaryota" | family %in% "Hominidae") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% taxa_names() %>% length() Euk_ASVs_nb = ps_trimmed %>% subset_taxa(superkingdom == "Eukaryota" | family %ni% "Hominidae") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% taxa_names() %>% length() paste0("Percent of non-eukaryotes ASVs: ",round(Non_euk_ASVs_nb/Euk_ASVs_nb *100,2)," %") # Looking at percent of reads belonging to non-Eukaryota Non_euk_reads_nb = ps_trimmed %>% subset_taxa(superkingdom != "Eukaryota" | family %in% "Hominidae") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% sample_sums() %>% sum() Euk_reads_nb = ps_trimmed %>% subset_taxa(superkingdom == "Eukaryota" | family %ni% "Hominidae") %>% phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) %>% sample_sums() %>% sum() paste0("Percent of non-eukaryotes reads: ",round(Non_euk_reads_nb/Euk_reads_nb *100,5)," %") ``` #### Removing non-eukaryotic ASVs (including homonidae) ```{r echo=FALSE, message=FALSE, warning=FALSE, collapse=T} ps_trimmed_euk = ps_trimmed %>% subset_taxa(superkingdom=="Eukaryota" & family %ni% "Hominidae" & class %ni% "Aves") # # Looking at read counts # cat("\nBefore data\n") # ps_trimmed # paste0("Total number of reads: ",ps_trimmed %>% sample_sums() %>% sum()) # cat("\nAfter data\n") # ps_trimmed_euk # paste0("Total number of reads: ",ps_trimmed_euk %>% sample_sums() %>% sum()) # Adding reads lost for sediment samples (processed in another script) before = ps_trimmed %>% sample_sums() %>% sum() after = ps_trimmed_euk %>% sample_sums() %>% sum() before_r = ps_trimmed %>% taxa_names() %>% length() after_r = ps_trimmed_euk %>% taxa_names() %>% length() cat("\nTotal number of lost ASVs and reads after non-eukaryote ASVs removal\n") paste0("Lost reads: ", before-after) paste0("Lost ASVs: ", before_r-after_r) cat("\nPercent of lost ASVs and reads after non-eukaryote ASVs removal\n") paste0("Lost reads: ",round(100-(after/before * 100),2)," %") paste0("Lost ASVs: ",round(100-(after_r/before_r * 100),2)," %") ``` ### Looking at number of rare ASVs ```{r echo=FALSE, fig.height=6, fig.width=10, message=FALSE, warning=FALSE, collapse=TRUE} TS = sort(taxa_sums(ps_trimmed_euk)) %>% as.data.frame() %>% rownames_to_column("ASV") p1= ggplot(TS, aes(.)) + geom_histogram() + theme_stata() + scale_x_sqrt(limits= c(0,10000), breaks = c(1,5,10,500,2500,5000,7500,10000)) + scale_y_sqrt(breaks = c(100, 1000, 5000,10000,25000,50000)) + xlab("Reads/ASV") + ylab("Number of ASVs") + ggtitle("ASVs distribution") cat("\nProportions of rare ASVs in samples\n") paste0("Singletons: ",taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) == 1] %>% length(),": ",round(taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) == 1] %>% length()/taxa_sums(ps_trimmed_euk)%>% length()*100,2)," %") paste0("Doubletons: ",taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) == 2] %>% length(),": ",round(taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) == 2] %>% length()/taxa_sums(ps_trimmed_euk)%>% length()*100,2)," %") paste0("Less than 10 reads: ",taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) <10] %>% length(),": ",round(taxa_sums(ps_trimmed_euk)[taxa_sums(ps_trimmed_euk) <10] %>% length()/taxa_sums(ps_trimmed_euk)%>% length()*100,2)," %") p1 ``` #### Effect of removing rare ASVs (ASVs with less than 2 reads in a minimum of 3 samples) \*This is not performed. It is just to see the impact. ```{r #### Removing taxa with less than 2 reads in a minimum of 3 samples (not performed at the moment), echo=FALSE, message=FALSE, warning=FALSE, collapse=TRUE} # Filtering taxa ------------------------------------------------- ps_trimmed_euk_min = filter_taxa(ps_trimmed_euk, function(x) sum(x > 2) > (3), TRUE) # Adding reads lost for sediment samples (processed in another script) before = ps_trimmed_euk %>% sample_sums() %>% sum() after = ps_trimmed_euk_min %>% sample_sums() %>% sum() cat("\nPercent of lost reads after rare ASVs removal\n") paste0("Lost reads: ",round(100-(after/before * 100),2)," %") cat("\nPercent of lost ASVs after rare ASVs removal\n") paste0("Lost ASVs: ",round(100-(ntaxa(ps_trimmed_euk_min)/ntaxa(ps_trimmed_euk) * 100),2)," %") ``` ### Looking at sequencing depth per sample with rarefaction curves ```{r , message=FALSE, warning=FALSE, collapse=TRUE, include=FALSE} ps_trimmed_euk_min = ps_trimmed_euk dir.create("../Metabarcoding_data/Cleaned_data") dir.create("../Metabarcoding_data/Cleaned_data/Sequencing_depth") s = sample_sums(ps_trimmed_euk_min) %>% sort() x = split(s, ceiling(seq_along(s)/10)) xx <- plyr::ldply(x, function(x){ plyr::rename(ldply(x, function(z) {data.frame(read_count=z)}),c(".id" = ".id2")) }) xx =xx %>% dplyr::group_by(.id) %>% dplyr::mutate(.id3 = max(read_count)) xx = xx[match(sample_names(ps_trimmed_euk_min),xx$.id2),] ps_trimmed_euk_min@sam_data$read_count_gr = paste0("Group_",xx$.id," (<=",xx$.id3,")") ps_trimmed_euk_min@sam_data$read_count_gr = factor(ps_trimmed_euk_min@sam_data$read_count_gr, levels = gtools::mixedsort(ps_trimmed_euk_min@sam_data$read_count_gr %>% unique())) p <- ranacapa::ggrare(ps_trimmed_euk_min, step = 1000, color = "read_count_gr", se = FALSE) p <- p + facet_wrap(~read_count_gr,scales = "free_x") + theme_bw() + ylab("ASV richness") + xlab("Sequencing depth") + guides(color=guide_legend(title="Sequencing depth")) + theme(axis.text.x = element_text(angle = 45)) ggsave(plot = p, filename = "rarefaction_curves.pdf", path = "../Metabarcoding_data/Cleaned_data/Sequencing_depth/", device = "pdf", width = 12, height = 8) ``` ```{r echo=FALSE, fig.height= 8, fig.width=16} ggplotly(p) ``` ### Looking at read count per sample and removing those with less than 1000 reads. ```{r echo=FALSE, message=FALSE, warning=FALSE} df_read_count = ps_trimmed_euk_min %>% phyloseq::sample_sums() %>% sort() %>% as.data.frame(col.names = "Reads") colnames(df_read_count) = "Read count" df_read_count %>% datatable(class = 'cell-border stripe', caption = "Read count per sample after decontamination", options = list(pageLength = 10)) # Filtering samples ------------------------------------------------- ps_trimmed_euk_min_min = phyloseq::prune_samples(phyloseq::sample_sums(ps_trimmed_euk_min) >= 1000, ps_trimmed_euk_min) ``` ### Saving results ```{r message=FALSE, warning=FALSE, code_folding=TRUE} saveRDS(ps_trimmed_euk_min_min,"../Metabarcoding_data/Cleaned_data/ps_clean.rds") # # Txt tables ---------------------------------------------------- ASV_table = ps_trimmed_euk_min_min %>% pstoveg_otu() %>% t() %>% as.data.frame() %>% rownames_to_column('feature-id') write.table(ASV_table,'../Metabarcoding_data/Cleaned_data/ASV_table.txt', quote = F, row.names = F, sep = "\t") Taxo_table = ps_trimmed_euk_min_min@tax_table@.Data %>% as.data.frame() Taxo_table = Taxo_table %>% rownames_to_column('feature-id') write.table(Taxo_table,"../Metabarcoding_data/Cleaned_data/taxonomy_table.txt", quote = F, row.names = F, sep = "\t") Metadata = ps_trimmed_euk_min_min@sam_data %>% data.frame() Metadata = Metadata %>% rownames_to_column('sample-id') write.table(Metadata,"../Metabarcoding_data/Cleaned_data/metadata_table.txt", quote = F, row.names = F, sep = "\t") writeXStringSet(ps_trimmed_euk_min_min@refseq, "../Metabarcoding_data/Cleaned_data/ref_seq.fasta") ``` --- title: "Marsden Cove data analysis" --- ### Loading libraries ```{r Loading libraries, message=FALSE, warning=FALSE, code_folding=TRUE} library(biohelper) library(ggplot2) library(dplyr) library(data.table) library(Biostrings) library(janitor) library(ggthemes) library(DT) library(tibble) library(tidyverse) library(UpSetR) library(vegan) library(plotly) library(crosstalk) library(ranacapa) library(plyr) library(glue) library(RColorBrewer) library(DECIPHER) library(UpSetR) library(theseus) library(microViz) library(grid) library(ggpubr) library(pracma) library(ggbreak) library(patchwork) library(twoddpcr) library(ggpubr) library(pairwiseAdonis) library(metagMisc) # options(rgl.useNULL=TRUE) # library(dpcR) dir.create("../Results_data_analysis") time_func = function(df){ df$time2 = 0 for (i in 1:nrow(df)) { if(df$time[i] == 0.1){ df$time2[i] = 10 }else if(df$time[i] == 0.3){ df$time2[i] = 30 }else if(df$time[i] == 1.0){ df$time2[i] = 60 }else if(df$time[i] == 3.0){ df$time2[i] = 180 }else if(df$time[i] == 6.0){ df$time2[i] = 360 }else if(df$time[i] == 12.0){ df$time2[i] = 720 } } return(df) } ``` ### ddPCR analyses #### Loading data Metadata ```{r} metadata <- read.csv("../Metadata/metadataMarsden.csv") %>% janitor::clean_names() %>% time_func() ``` Sabella ```{r} sabella <- read.csv("../ddPCR_info/preliminary analysis_Sab/PassiveSabella_cleaned.csv", h=T, row.names=1,stringsAsFactors = F) %>% janitor::clean_names() %>% time_func() #Sabella <- Sabella[-c(79,77),] # Why are these being removed? Suspected too high?? meta_s = sabella %>% dplyr::select(c(id,matrix, time, replicates, extraction,time2)) ``` Begula ```{r} # begula_r4 <- readxl::read_xlsx("../ddPCR_data/Begula/Run 4/ddPCR Begula neritinia Run 4 Results Marsden cove 2022-11-02.xlsx") %>% # janitor::clean_names() # # begula_r5 <- readxl::read_xlsx("../ddPCR_data/Begula/Run 5/ddPCR Begula neritinia Run 5 Results Marsden cove 2022-11-03.xlsx") %>% # janitor::clean_names() # # begula = bind_rows(begula_r4 %>% dplyr::select(-well), begula_r5 %>% dplyr::select(-well)) # rm(begula_r4,begula_r5) # colnames(begula)[3] = "conc_copies_u_l" # begula$id = begula$sample # write.csv(begula,"../ddPCR_data/Begula/ddPCR_results.csv",row.names = F) begula = read_csv("../ddPCR_data/Bugula/ddPCR_results.csv") begula = left_join(begula, metadata, by = "id") %>% dplyr::group_by(sample,id, target, matrix, time, time2, replicates) %>% dplyr::summarise( copies_u_l = mean(conc_copies_u_l), accepted_droplets = mean(accepted_droplets), positives = mean(positives), negatives = mean(negatives)) %>% dplyr::mutate(target = "Begula neritina") %>% dplyr::filter(!is.na(matrix)) %>% dplyr::filter(!is.na(copies_u_l)) %>% dplyr::filter(matrix != "field blank") ``` Styela ```{r} # styela_r1 <- readxl::read_xlsx("../ddPCR_data/Styela/Run 1/ddPCR Run 1 Results Styela Marsden cove passive sampling 2022-10-18.xlsx") %>% # janitor::clean_names() # # styela_r2 <- readxl::read_xlsx("../ddPCR_data/Styela/Run 2/ddCR Run 2 Styela Marsden cove passsive sampling copy number Results 22-10-19.xlsx") %>% # janitor::clean_names() # # styela_r3 <- readxl::read_xlsx("../ddPCR_data/Styela/Run 3/ddCR Run 3 Results Styela Marsden cove passsive sampling copy number.xlsx") %>% # janitor::clean_names() %>% # dplyr::mutate(conc_copies_m_l = as.numeric(conc_copies_m_l)) # # styela = bind_rows(styela_r2 %>% dplyr::select(-well), styela_r3 %>% dplyr::select(-well)) # rm(styela_r1,styela_r2,styela_r3) # colnames(styela)[3] = "conc_copies_u_l" # styela$id = styela$sample # write.csv(styela,"../ddPCR_data/Styela/ddPCR_results.csv",row.names = F) styela = read_csv("../ddPCR_data/Styela/ddPCR_results.csv") styela = left_join(styela, metadata, by = "id") %>% dplyr::group_by(sample,id, target, matrix, time, time2, replicates) %>% dplyr::summarise( copies_u_l = mean(conc_copies_u_l), accepted_droplets = mean(accepted_droplets), positives = mean(positives), negatives = mean(negatives)) %>% dplyr::mutate(target = "Styela") %>% dplyr::filter(!is.na(matrix)) %>% dplyr::filter(!is.na(copies_u_l)) %>% dplyr::filter(matrix != "field blank") ``` Undaria ```{r} # undaria_r6 <- readxl::read_xlsx("../ddPCR_data/Undaria/Run 6/ddPCR Run 6 Undaria new probe copies per uL Marsden cove 2022-11-23 Results.xlsx") %>% # janitor::clean_names() # # undaria_r7 <- readxl::read_xlsx("../ddPCR_data/Undaria/Run 7/ddPCR Run 7 Undaria new probe Marsden cove samles copies per uL 2022-06-12 Results.xlsx") %>% # janitor::clean_names() %>% # dplyr::mutate(conc_copies_m_l = as.numeric(conc_copies_m_l)) # # undaria_r8 <- readxl::read_xlsx("../ddPCR_data/Undaria/Run 8/ddPCR Run 8 Undaria new probe Marsden cove samles copies per uL 2022-12-07 Results.xlsx") %>% # janitor::clean_names() %>% # dplyr::mutate(conc_copies_m_l = as.numeric(conc_copies_m_l)) # # undaria = bind_rows(undaria_r7 %>% dplyr::select(-well), undaria_r8 %>% dplyr::select(-well)) # rm(undaria_r6,undaria_r7,undaria_r8) # colnames(undaria)[3] = "conc_copies_u_l" # undaria$id = undaria$sample # write.csv(undaria,"../ddPCR_data/Undaria/ddPCR_results.csv",row.names = F) undaria = read_csv("../ddPCR_data/Undaria/ddPCR_results.csv") undaria = left_join(undaria, metadata, by = "id") %>% dplyr::group_by(sample,id, target, matrix, time, time2, replicates) %>% dplyr::summarise( copies_u_l = mean(conc_copies_u_l), accepted_droplets = mean(accepted_droplets), positives = mean(positives), negatives = mean(negatives)) %>% dplyr::mutate(target = "Undaria") %>% dplyr::filter(!is.na(matrix)) %>% dplyr::filter(!is.na(copies_u_l)) %>% dplyr::filter(matrix != "field blank") ``` #### Creating functions ```{r} reg_plot_func = function(df){ df = df %>% dplyr::mutate("sqrt_copies_uL" = sqrt(copies_u_l), "fthrt_copies_uL" = nthroot(copies_u_l, 4)) ref = df %>% dplyr::filter(matrix == "active") %>% dplyr::pull(sqrt_copies_uL) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) if(bottom_ref<0){ bottom_ref = 0 } p = ggscatter( df %>% dplyr::filter(matrix != "active"), x = "time2", y = "sqrt_copies_uL", color = "matrix", palette = "jco", add = "loess", #loess conf.int = F, conf.int.level = 0.95 ) + geom_hline(yintercept =top_ref ,alpha=1, linetype = "dotted")+ geom_hline(yintercept =m_ref ,alpha=1, linetype = "twodash")+ geom_hline(yintercept =bottom_ref ,alpha=1, linetype = "dotted")+ facet_wrap(~matrix, scales = "free", nrow = 1) + stat_cor()+ ylab("copies/ul (sqrt)")+ xlab("Submersion time (min)")+ scale_x_continuous(breaks =c(0,10,30,60,180,360,720)) + guides(color=guide_legend(title="Matrix")) return(p) } barplot_func = function(df, sb = NA){ df = df %>% dplyr::mutate("sqrt_copies_uL" = sqrt(copies_u_l), "fthrt_copies_uL" = nthroot(copies_u_l, 4)) data_summary <- function(data, varname, groupnames){ summary_func <- function(x, col){ c(mean = mean(x[[col]], na.rm=TRUE), sd = sd(x[[col]], na.rm=TRUE)) } data_sum<-ddply(data, groupnames, .fun=summary_func,varname) data_sum <- rename(data_sum, c("mean" = varname)) return(data_sum) } df2 <- data_summary(df %>% dplyr::filter(matrix != "active"), varname="sqrt_copies_uL", groupnames=c("matrix", "time2")) %>% dplyr::filter(time2!=0) for (i in 1:nrow(df2)) { df2$bottom_sd[i] = df2$sqrt_copies_uL[i] - df2$sd[i] if(is.na(df2$bottom_sd[i])){ df2$bottom_sd[i] = 0 } if(df2$bottom_sd[i]<0){ df2$bottom_sd[i] = 0 } } ref = df %>% dplyr::filter(matrix == "active") %>% dplyr::pull(sqrt_copies_uL) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) if(bottom_ref<0){ bottom_ref = 0 } df2$time2 = factor(df2$time2,levels=c(10,30,60,180,360,720)) p<- ggplot(df2, aes(x=time2, y=sqrt_copies_uL, fill=matrix)) + geom_bar(stat="identity", color="black", position=position_dodge()) + geom_errorbar(aes(ymin=bottom_sd, ymax=sqrt_copies_uL+sd), width=.2,position=position_dodge(.9))+ geom_hline(yintercept =top_ref ,alpha=1, linetype = "dotted")+ geom_hline(yintercept =m_ref ,alpha=1, linetype = "twodash")+ geom_hline(yintercept =bottom_ref ,alpha=1, linetype = "dotted")+ theme_classic()+ #scale_y_continuous(breaks=c(0,0.5,1,1.5,2,4.5,5,7.5,8))+ labs(title=" ", x="Submersion time (min)", y = "copies/ul (sqrt)") + guides(fill=guide_legend(title="Matrix")) p = set_palette(p,palette = "jco") # if(all(!is.na(sb))){ # p = p + scale_y_break(breaks = sb) # } return(p) } ``` #### Performing plots and analyses Sabella spalenzanii ```{r} # Plot reg_plot_func(df = sabella)#+facet_wrap(~matrix, scales = "free") ggsave(filename = "../Results_data_analysis/Figures/reg_ss_ddpcr.pdf",width = 15,height = 4) barplot_func(df = sabella, sb = c(2,4.5,5,7.5)) ggsave(filename = "../Results_data_analysis/Figures/bp_ss_ddpcr.pdf",width = 7,height = 4) #Stats res.aov2 <- aov(copies_u_l ~ matrix * time, data = sabella %>% dplyr::filter(matrix!="active")) summary(res.aov2) ``` Begula ```{r} # Plot reg_plot_func(df = begula)#+facet_wrap(~matrix, scales = "free") ggsave(filename = "../Results_data_analysis/Figures/reg_beg_ddpcr.pdf",width = 15,height = 4) barplot_func(df = begula, sb = NA) ggsave(filename = "../Results_data_analysis/Figures/bp_beg_ddpcr.pdf",width = 7,height = 4) #Stats res.aov2 <- aov(copies_u_l ~ matrix * time, data = begula %>% dplyr::filter(matrix!="active")) summary(res.aov2) ``` Styela ```{r} # Plot reg_plot_func(df = styela)#+facet_wrap(~matrix, scales = "free") ggsave(filename = "../Results_data_analysis/Figures/reg_sty_ddpcr.pdf",width = 15,height = 4) barplot_func(df = styela, sb = NA) ggsave(filename = "../Results_data_analysis/Figures/bp_sty_ddpcr.pdf",width = 7,height = 4) #Stats res.aov2 <- aov(copies_u_l ~ matrix * time, data = styela %>% dplyr::filter(matrix!="active")) summary(res.aov2) ``` Undaria ```{r} # Plot reg_plot_func(df = undaria)#+facet_wrap(~matrix, scales = "free") ggsave(filename = "../Results_data_analysis/Figures/reg_und_ddpcr.pdf",width = 15,height = 4) barplot_func(df = undaria, sb = NA) ggsave(filename = "../Results_data_analysis/Figures/bp_und_ddpcr.pdf",width = 7,height = 4) #Stats res.aov2 <- aov(copies_u_l ~ matrix * time, data = undaria %>% dplyr::filter(matrix!="active")) summary(res.aov2) ``` ###scatter plot with exponential model ```{r} # Required libraries library(ggplot2) library(dplyr) library(ggpubr) library(rcompanion) # for nthroot function # Function to plot GLM with exponential curve and annotate p-values reg_plot_func = function(df){ df = df %>% dplyr::mutate( "sqrt_copies_uL" = sqrt(copies_u_l), "fthrt_copies_uL" = nthroot(copies_u_l, 4) ) ref = df %>% dplyr::filter(matrix == "active") %>% dplyr::pull(sqrt_copies_uL) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) if(bottom_ref < 0){ bottom_ref = 0 } # Calculate the first and last three unique values for x-axis breaks x_breaks = sort(unique(df$time2)) x_breaks = c(x_breaks[1], x_breaks[(length(x_breaks)-2):length(x_breaks)]) p = ggscatter( df %>% dplyr::filter(matrix != "active"), x = "time2", y = "sqrt_copies_uL", color = "matrix", palette = "jco", add = "none", # Remove loess conf.int = F ) + geom_hline(yintercept = top_ref, alpha = 1, linetype = "dotted") + geom_hline(yintercept = m_ref, alpha = 1, linetype = "twodash") + geom_hline(yintercept = bottom_ref, alpha = 1, linetype = "dotted") + facet_wrap(~matrix, scales = "free", nrow = 1) + ylab("copies/ul (sqrt)") + xlab("Submersion time (min)") + scale_x_continuous(breaks = x_breaks) + scale_y_continuous(limits = c(0, 5)) + # Limit the y-axis to 5 guides(color = guide_legend(title = "Matrix")) + theme(axis.text.x = element_text(size = 8), # Adjust x-axis label size axis.text.y = element_text(size = 8), # Adjust y-axis label size axis.title = element_text(size = 10), # Adjust axis title size plot.margin = margin(1, 1, 1, 1, "cm")) # Adjust plot margins for annotations glm_p_values = list() # List to store p-values for each matrix level # Adding GLM fit with exponential curve and calculating p-values for (matrix_level in unique(df$matrix[df$matrix != "active"])) { df_subset = df %>% dplyr::filter(matrix == matrix_level) # Ensure all response values are positive by adding a small constant if necessary df_subset$sqrt_copies_uL = ifelse(df_subset$sqrt_copies_uL <= 0, 1e-6, df_subset$sqrt_copies_uL) glm_model = glm(sqrt_copies_uL ~ time2, data = df_subset, family = gaussian(link = "log")) pred = predict(glm_model, newdata = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100)), type = "response") p = p + geom_line(data = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100), pred = pred, matrix = matrix_level), aes(x = time2, y = pred, color = matrix), size = 1) # Extract p-value from GLM summary glm_summary = summary(glm_model) p_value = coef(glm_summary)["time2", "Pr(>|t|)"] glm_p_values[[matrix_level]] = p_value # Annotate p-value on the plot within its corresponding facet p = p + geom_text(data = data.frame(matrix = matrix_level, label = paste0("p-value: ", round(p_value, 4))), aes(x = min(df_subset$time2), y = 4.5, label = label), # Adjust y position if needed color = "black", size = 3, hjust = 0) } return(p) } # Using the function reg_plot_func(df = sabella) library(ggplot2) library(dplyr) library(ggpubr) library(rcompanion) # for nthroot function # Function to plot GLM with exponential curve and annotate p-values and R values reg_plot_func = function(df){ df = df %>% dplyr::mutate( "sqrt_copies_uL" = sqrt(copies_u_l), "fthrt_copies_uL" = nthroot(copies_u_l, 4) ) ref = df %>% dplyr::filter(matrix == "active") %>% dplyr::pull(sqrt_copies_uL) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) if(bottom_ref < 0){ bottom_ref = 0 } # Calculate the first and last three unique values for x-axis breaks x_breaks = sort(unique(df$time2)) x_breaks = c(x_breaks[1], x_breaks[(length(x_breaks)-2):length(x_breaks)]) # Determine y-axis limits for styela matrix y_max_styela = ifelse("styela" %in% df$matrix, 1, 1) p = ggscatter( df %>% dplyr::filter(matrix != "active"), x = "time2", y = "sqrt_copies_uL", color = "matrix", palette = "jco", add = "none", # Remove loess conf.int = F ) + geom_hline(yintercept = top_ref, alpha = 1, linetype = "dotted") + geom_hline(yintercept = m_ref, alpha = 1, linetype = "twodash") + geom_hline(yintercept = bottom_ref, alpha = 1, linetype = "dotted") + facet_wrap(~matrix, scales = "free", nrow = 1) + ylab("copies/ul (sqrt)") + xlab("Submersion time (min)") + scale_x_continuous(breaks = x_breaks) + scale_y_continuous(limits = c(0, y_max_styela)) + # Limit the y-axis to 1 for styela and 5 for others guides(color = guide_legend(title = "Matrix")) + theme(axis.text.x = element_text(size = 8), # Adjust x-axis label size axis.text.y = element_text(size = 8), # Adjust y-axis label size axis.title = element_text(size = 10), # Adjust axis title size plot.margin = margin(1, 1, 1, 1, "cm")) # Adjust plot margins for annotations glm_p_values = list() # List to store p-values for each matrix level # Adding GLM fit with exponential curve and calculating p-values and R values for (matrix_level in unique(df$matrix[df$matrix != "active"])) { df_subset = df %>% dplyr::filter(matrix == matrix_level) # Ensure all response values are positive by adding a small constant if necessary df_subset$sqrt_copies_uL = ifelse(df_subset$sqrt_copies_uL <= 0, 1e-6, df_subset$sqrt_copies_uL) glm_model = glm(sqrt_copies_uL ~ time2, data = df_subset, family = gaussian(link = "log")) pred = predict(glm_model, newdata = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100)), type = "response") p = p + geom_line(data = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100), pred = pred, matrix = matrix_level), aes(x = time2, y = pred, color = matrix), size = 1) # Extract p-value and R-squared from GLM summary glm_summary = summary(glm_model) p_value = coef(glm_summary)["time2", "Pr(>|t|)"] glm_p_values[[matrix_level]] = p_value r_squared = 1 - glm_summary$deviance / glm_summary$null.deviance r_value = sqrt(r_squared) # Annotate R value and p-value on the plot within its corresponding facet p = p + geom_text(data = data.frame(matrix = matrix_level, label = paste0("R: ", round(r_value, 4), "\np-value: ", round(p_value, 4))), aes(x = min(df_subset$time2), y = y_max_styela * 0.9, label = label), # Adjust y position if needed color = "black", size = 3, hjust = 0) } return(p) } reg_plot_func(df = styela) reg_plot_func(df = begula) # Save the plot ggsave(filename = "../Results_data_analysis/Figures/reg_ss_ddpcr.pdf", width = 15, height = 4) ``` ### Metabarcoding analyses #### Loading the data ```{r} # 18S ############################################################# # Load the RDS file ps <- readRDS("../Metabarcoding_data/Cleaned_data/ps_clean.rds") m = ps %>% pstoveg_sample() ``` #### Taxonomic composition (per time per membrane) Do we capture any of the four NIS with 18S? ```{r} ps_RA = ps %>% tax_fix(unknowns = c("Unknown Family")) %>% microViz::tax_agg(rank = "family") %>% microbiome::transform(transform = "compositional") ps_RA@sam_data$time = factor(ps_RA@sam_data$time,levels = ps_RA@sam_data$time %>% unique()) ps_RA@sam_data$sample_id = factor(ps_RA@sam_data$sample_id, levels = ps_RA %>% pstoveg_sample() %>% dplyr::arrange(time) %>% dplyr::pull(sample_id)) l = levels(ps_RA@sam_data$sample_id) names(l) = 1:length(l) sample_names(ps_RA) = names(l[ps_RA@sam_data$sample_id]) ps_RA = microViz::phyloseq_validate(ps_RA) library(speedyseq) p = taxo_bar_plot(ps_RA, rank1 = "phylum", rank2 = "family", colors=c("cyan","palegreen","dodgerblue","white","yellow","deeppink","lightsalmon"), alpha_num_ordering = T, n_rank1 = NA, n_rank2 = 6) p = p + scale_x_discrete(breaks=sample_names(ps_RA), labels=ps_RA@sam_data$time) pp = p +facet_grid(matrix~., scales="free", space="free") pp = p +facet_wrap(.~matrix, scales="free") pp dir.create("../Results_data_analysis") ggsave(pp + theme(legend.position="none"),filename = "../Results_data_analysis/Figures/taxo_barplot.pdf",width = 10, height = 8) legend <- cowplot::get_legend(pp) ggsave(legend,filename = "../Results_data_analysis/Figures/taxo_barplot_leg.pdf",width = 8, height = 6) ``` #### Identify putative NIS ```{r} nis = readxl::read_xlsx("../Metabarcoding_data/Cleaned_data/NIS.xlsx") %>% dplyr::filter(pident==100) %>% dplyr::arrange(qseqid, desc(pident), desc(length), evalue) %>% dplyr::group_by(qseqid) %>% dplyr::slice_head(n = 1) %>% dplyr::mutate(species2 = gsub(".*;(.*)","\\1",sseqid), species2 = gsub("_"," ",species2)) %>% as.matrix() %>% as.data.frame() names(nis)[1] = "asv" ps_RA = ps %>% microbiome::transform(transform = "compositional") %>% prune_taxa(taxa = nis$asv) tax = ps_RA@tax_table@.Data %>% as.data.frame() tax = left_join(tax, nis %>% dplyr::select(asv, species2)) tax$genus = gsub("(.*) .*","\\1", tax$species2) tax$species = tax$species2 tax$species2=NULL ps_RA@tax_table@.Data = tax %>% as.matrix() rownames(ps_RA@tax_table@.Data) = tax$asv ps_RA@sam_data$time = factor(ps_RA@sam_data$time,levels = ps_RA@sam_data$time %>% unique()) ps_RA@sam_data$sample_id = factor(ps_RA@sam_data$sample_id, levels = ps_RA %>% pstoveg_sample() %>% dplyr::arrange(time) %>% dplyr::pull(sample_id)) l = levels(ps_RA@sam_data$sample_id) names(l) = 1:length(l) sample_names(ps_RA) = names(l[ps_RA@sam_data$sample_id]) ps_RA = microViz::phyloseq_validate(ps_RA) p = taxo_bar_plot(ps_RA, rank1 = "genus", rank2 = "species", colors=c("cyan","palegreen","dodgerblue","white","yellow","deeppink","lightsalmon"), alpha_num_ordering = T, n_rank1 = NA, n_rank2 = 8) p = p + scale_x_discrete(breaks=sample_names(ps_RA), labels=ps_RA@sam_data$time) pp = p +facet_grid(matrix~., scales="free", space="free") pp = p +facet_wrap(.~matrix, scales="free") pp dir.create("../Results_data_analysis") ggsave(pp + theme(legend.position="none"),filename = "../Results_data_analysis/Figures/taxo_barplot_nis.pdf",width = 10, height = 8) legend <- cowplot::get_legend(pp) ggsave(legend,filename = "../Results_data_analysis/Figures/taxo_barplot_leg_nis.pdf",width = 8, height = 6) ``` #### Heatmap of NIS I think there is too many categories to make a nice heatmap. Perhaps the barplot above is best? ```{r} #heatmaps for metazoa/NIS from PAT for matrix as columns or time, ps_RA_nis = ps_RA #ps_temp = ps_RA_nis nis_time_func = function(ps_temp){ ps_temp@sam_data$time = as.factor(ps_temp@sam_data$time) ps_temp@tax_table@.Data[,2] = NA ps_temp@tax_table@.Data[,3] = NA ps_temp@tax_table@.Data[,4] = NA ps_temp@tax_table@.Data[,5] = NA ps_temp@tax_table@.Data[,6] = NA ps_temp@tax_table@.Data[,7] = NA ps_temp = ps_temp%>% tax_glom(taxrank = "species") ps_temp_active = ps_temp %>% subset_samples(matrix == "active") %>% merge_samples(group = "matrix") ps_temp_active@sam_data$time = 0 ps_temp = ps_temp %>% subset_samples(matrix != "active") %>% merge_samples(group = "time") ps_temp = merge_phyloseq(ps_temp,ps_temp_active) %>% phyloseq::filter_taxa(function(x) sum(x) >0, TRUE) taxa_names(ps_temp) = ps_temp@tax_table@.Data[,"species"] %>% as.character() ps_temp_df = ps_temp %>% pstoveg_otu() %>% pracma::nthroot(4) %>% t() hmRA = ComplexHeatmap::Heatmap(ps_temp_df, cluster_columns = F, column_split = factor(colnames(ps_temp_df), levels = gtools::mixedsort(colnames(ps_temp_df) %>% unique())), column_title = NULL, col = hcl.colors(palette="viridis",n=5), row_title_rot = 0, column_title_rot = 0, heatmap_legend_param = list(title = "4th root transformed abundance",title_gp = grid::gpar(fontsize = 8),title_position = "lefttop-rot", labels_gp = grid::gpar(fontsize = 8)), column_names_rot = 35, row_names_gp = grid::gpar(fontsize = 8), column_names_gp = grid::gpar(fontsize = 10))#,column_order = gtools::mixedsort(colnames(ps_g_trimmed) %>% unique())) return(hmRA) } res_sponge = nis_time_func(ps = ps_RA_nis %>% subset_samples(matrix %in% c("sponge","active"))) res_spongeW = nis_time_func(ps = ps_RA_nis %>% subset_samples(matrix %in% c("sponge water","active"))) res_nylon = nis_time_func(ps = ps_RA_nis %>% subset_samples(matrix %in% c("nylon","active"))) res_nylon_disc = nis_time_func(ps = ps_RA_nis %>% subset_samples(matrix %in% c("nylon disc","active"))) res_nylon_mesh = nis_time_func(ps = ps_RA_nis %>% subset_samples(matrix %in% c("nylon mesh","active"))) ``` #### Richness (per time per membrane) Overall richness ```{r} reg_plot_func2 = function(ps_temp){ ps_temp@sam_data$richness = rarefy_even_depth(ps_temp, rngseed=11, sample.size= min(sample_sums(ps_temp)), replace=F) %>% phyloseq::estimate_richness(measures = "Observed") %>% dplyr::pull(Observed) m_temp = ps_temp %>% pstoveg_sample() ref = m_temp %>% dplyr::filter(matrix == "active") %>% dplyr::pull(richness) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) # if(bottom_ref<0){ # bottom_ref = 0 # } p = ggscatter( m_temp %>% dplyr::filter(matrix != "active"), x = "time2", y = "richness", color = "matrix", palette = "jco", add = "reg", #loess conf.int = F, conf.int.level = 0.95 ) + geom_hline(yintercept =top_ref ,alpha=1, linetype = "dotted")+ geom_hline(yintercept =m_ref ,alpha=1, linetype = "twodash")+ geom_hline(yintercept =bottom_ref ,alpha=1, linetype = "dotted")+ facet_wrap(~matrix, scales = "fixed", nrow = 1) + stat_cor()+ ylab("Richness")+ xlab("Submersion time (min)")+ scale_x_continuous(breaks =c(0,10,30,60,180,360,720)) + guides(color=guide_legend(title="Matrix")) return(p) } ``` ```{r} #####with exponential model######### library(ggplot2) library(dplyr) library(ggpubr) library(phyloseq) library(vegan) reg_plot_func2 = function(ps_temp) { # Estimate richness ps_temp@sam_data$richness = rarefy_even_depth(ps_temp, rngseed = 11, sample.size = min(sample_sums(ps_temp)), replace = F) %>% phyloseq::estimate_richness(measures = "Observed") %>% dplyr::pull(Observed) # Convert to data frame m_temp = ps_temp %>% pstoveg_sample() # Calculate reference lines ref = m_temp %>% dplyr::filter(matrix == "active") %>% dplyr::pull(richness) m_ref = mean(ref) top_ref = m_ref + sd(ref) bottom_ref = m_ref - sd(ref) p = ggscatter( m_temp %>% dplyr::filter(matrix != "active"), x = "time2", y = "richness", color = "matrix", palette = "jco", add = "none", # Remove loess, add = "reg.line" removed for manual addition conf.int = F, conf.int.level = 0.95 ) + geom_hline(yintercept = top_ref, alpha = 1, linetype = "dotted") + geom_hline(yintercept = m_ref, alpha = 1, linetype = "twodash") + geom_hline(yintercept = bottom_ref, alpha = 1, linetype = "dotted") + facet_wrap(~matrix, scales = "fixed", nrow = 1) + ylab("Richness") + xlab("Submersion time (min)") + scale_x_continuous(breaks = c(10, 180, 360, 720)) + guides(color = guide_legend(title = "Matrix")) + theme(axis.text.x = element_text(size = 8), # Adjust x-axis label size axis.text.y = element_text(size = 8), # Adjust y-axis label size axis.title = element_text(size = 10)) # Adjust axis title size glm_p_values = list() # List to store p-values for each matrix level # Adding GLM fit with exponential curve and calculating p-values and R-squared values for (matrix_level in unique(m_temp$matrix[m_temp$matrix != "active"])) { df_subset = m_temp %>% dplyr::filter(matrix == matrix_level) # Ensure all response values are positive by adding a small constant if necessary df_subset$richness = ifelse(df_subset$richness <= 0, 1e-6, df_subset$richness) # Fit GLM with log link glm_model = glm(richness ~ time2, data = df_subset, family = poisson(link = "log")) pred = predict(glm_model, newdata = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100)), type = "response") # Add GLM regression line to plot p = p + geom_line(data = data.frame(time2 = seq(min(df_subset$time2), max(df_subset$time2), length.out = 100), richness = pred, matrix = matrix_level), aes(x = time2, y = richness, color = matrix), size = 1) # Extract p-value and R-squared from the model summary glm_summary = summary(glm_model) p_value = coef(glm_summary)["time2", "Pr(>|z|)"] glm_p_values[[matrix_level]] = p_value r_squared = 1 - glm_summary$deviance / glm_summary$null.deviance r_value = sqrt(r_squared) # Annotate R-squared and p-value on the plot within its corresponding facet p = p + geom_text(data = data.frame(matrix = matrix_level, label = paste0("R²: ", round(r_squared, 4), "\np-value: ", round(p_value, 4))), aes(x = min(df_subset$time2), y = max(df_subset$richness) * 0.9, label = label), color = "black", size = 3, hjust = 0) } return(p) } ``` ```{r} p_r = reg_plot_func2(ps_temp = ps) ggsave(plot = p_r, filename = "../Results_data_analysis/Figures/richness_nis.pdf",width = 12, height = 5) p_r ``` NIS richness / abundance ```{r} p_rnis = reg_plot_func2(ps_temp = ps %>% prune_taxa(taxa = nis$asv) ) ggsave(plot = p_rnis, filename = "../Results_data_analysis/Figures/richness_nis.pdf",width = 12, height = 5) p_rnis ``` #### AMCOM-BC Investigating whether the capture of some NIS taxa is associated with time and/or matrix NOT RUN at the moment because not sure it is really worth for the MS ```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE} pst = ps %>% prune_taxa(taxa = nis$asv) pst@sam_data$matrix = factor(pst@sam_data$matrix, levels = c("active", "sponge", "sponge water","nylon","nylon mesh","nylon disc")) library(ANCOMBC) res_matrix = ancombc2(data = pst, #%>% subset_samples(time2 %in% c(0,720)) fix_formula = "matrix + time", group = "matrix", p_adj_method = "BH", alpha = 0.05, neg_lb = T, global = TRUE, pairwise = TRUE, dunnet = TRUE) ancom_res_matrix = res_matrix$res %>% pivot_longer(cols = starts_with("diff_matrix"), names_to = "matrix", values_to = "diff") %>% dplyr::filter(diff == T) %>% dplyr::select(taxon,contains("matrix")) %>% dplyr::mutate(variable = "matrix") ancom_res_time = res_matrix$res %>% pivot_longer(cols = starts_with("diff_time"), names_to = "time", values_to = "diff") %>% dplyr::filter(diff == T) %>% dplyr::select(taxon,contains("time")) %>% dplyr::mutate(variable = "time") #--------------------------------------------------------- # prep_func = function(resdf, name, tax){ # tax_rank_func = function(res){ # taxrank = c("Phylum","Class","Order","Family","Genus","Species") # res$rank="" # for (i in 1:nrow(res)) { # if(grepl(x = res$taxon[i],pattern=str_c(taxrank, collapse="|"))){ # res$rank[i] = unlist(str_extract_all(str = res$taxon[i], str_c(taxrank, collapse="|"))) # }else{ # res$rank[i]="ASV" # } # } # return(res) # } # resdf = tax_rank_func(resdf) # if(nrow(resdf)>0){ # #resdf$lfc_n = paste0("lfc_",resdf$variable) # resdf = resdf %>% pivot_longer(cols= starts_with("lfc_"), values_to = "lfc") # #dplyr::filter(lfc_n == name) # df = resdf %>% # dplyr::arrange(desc(lfc)) %>% # dplyr::mutate(direct = ifelse(lfc > 0, "Possitively associated with variable", "Negatively associated with variable")) #%>% dplyr::mutate(taxon = gsub(x = taxon, " Genus","")) # # df$taxon = factor(df$taxon, levels = df$taxon) # df$direct = factor(df$direct, # levels = c("Possitively associated with variable", "Negatively associated with variable")) # df = left_join(df, tax) # # # } # tax = # # # p1 = ggplot(data = df, # aes(x = taxon, y = lfc, fill = variable, color = variable)) + # geom_bar(stat = "identity", width = 0.7, # position = position_dodge(width = 0.4)) + # #geom_errorbar(aes(ymin = lfc - se_treatmentT1, ymax = lfc_treatmentT1 + se_treatmentT1), width = 0.2, # #position = position_dodge(0.05), color = "black") + # labs(x = NULL, y = "Log fold change", # title = "Log fold changes") + # scale_fill_discrete(name = "Variable") + # scale_color_discrete(name = NULL) + # ggthemes::theme_clean() + coord_flip() # mylist[["p"]] = p1 # } # } # mylist[["out"]] = out # return(mylist) # } ``` #### Beta-diversity ##### Ordination ```{r message=FALSE, warning=FALSE} # source("envtoverlay_oli.R") pord1 = envtoverlay_oli(ps, covariates="time2", ordmet = "PCA", size = 3, shape = "matrix") pord1 ggsave(plot = pord1, filename = "../Results_data_analysis/Figures/pca.pdf",width = 7, height = 7) ``` ##### PERMANOVA ```{r} perm_func = function(ps_temp, var){ ps_temp = ps_temp %>% microbiome::transform(transform = "compositional") m_temp = ps_temp %>% pstoveg_sample() dist = distance(ps_temp, method="bray") formula = stats::as.formula(sprintf("%s ~ %s", "dist", paste(var,collapse = " * "))) ado = adonis2(formula = formula, data = m_temp, permutations = 999) return(ado) } perm_func(ps_temp = ps, var = c("time2","matrix")) perm_func(ps_temp = ps %>% subset_samples(matrix=="nylon"),var = c("time2")) perm_func(ps_temp = ps %>% subset_samples(matrix=="nylon disc"),var = c("time2")) perm_func(ps_temp = ps %>% subset_samples(matrix=="nylon mesh"),var = c("time2")) perm_func(ps_temp = ps %>% subset_samples(matrix=="sponge"),var = c("time2")) perm_func(ps_temp = ps %>% subset_samples(matrix=="sponge water"),var = c("time2")) # Pairwise-PERMANOVA ppperm_func = function(ps_temp){ ps_temp = ps_temp %>% microbiome::transform(transform = "compositional") m_temp = ps_temp %>% pstoveg_sample() dist = distance(ps_temp, method="bray") perm = pairwise.adonis(dist, m_temp$matrix) return(perm) } ppperm_res_all = ppperm_func(ps) ppperm_res_12 = ppperm_func(ps %>% subset_samples(time%ni% c(0.1,0.3,1.0,3.0,6.0))) ppperm_res_6 = ppperm_func(ps %>% subset_samples(time%ni% c(0.1,0.3,1.0,3.0,12))) ppperm_res_3 = ppperm_func(ps %>% subset_samples(time%ni% c(0.1,0.3,1.0,6.0,12))) ppperm_res_1 = ppperm_func(ps %>% subset_samples(time%ni% c(0.1,0.3,3.0,6.0,12))) ppperm_res_03 = ppperm_func(ps %>% subset_samples(time%ni% c(0.1,1.0,3.0,6.0,12))) ppperm_res_01 = ppperm_func(ps %>% subset_samples(time%ni% c(0.3,1.0,3.0,6.0,12))) #networks for matrix community over time? #Indicatorspecies OR ANCOM-BC per matrix and time ``` ##### Evolution of beta-diversity similarity over time ```{r message=FALSE, warning=FALSE, include=FALSE} similarity_time_func = function(ps, group, method, earliest_time){ ps_trimmed = ps %>% subset_samples(matrix=="nylon") %>% ps_mutate(time2 = as.factor(time2)) ps_rar = ps_trimmed %>% rarefy_even_depth(sample.size = min(sample_sums(ps)), rngseed = 31, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) ps_rar_clr <- ps_rar %>% microbiome::transform("compositional") temp = phyloseq_group_dissimilarity(ps_rar_clr, group = group, method = method, between_groups = T, justDF = T) %>% subset(Comparison =="between-groups") nylon = temp[gsub("(^.*)-.*$","\\1",temp$Group) == as.character(earliest_time),] %>% dplyr::mutate(matrix="nylon") ps_trimmed = ps %>% subset_samples(matrix=="nylon disc") %>% ps_mutate(time2 = as.factor(time2)) ps_rar = ps_trimmed %>% rarefy_even_depth(sample.size = min(sample_sums(ps)), rngseed = 31, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) ps_rar_clr <- ps_rar %>% microbiome::transform("compositional") temp = phyloseq_group_dissimilarity(ps_rar_clr, group = group, method = method, between_groups = T, justDF = T) %>% subset(Comparison =="between-groups") nylon_disc = temp[gsub("(^.*)-.*$","\\1",temp$Group) == as.character(earliest_time),] %>% dplyr::mutate(matrix="nylon disc") ps_trimmed = ps %>% subset_samples(matrix=="nylon mesh") %>% ps_mutate(time2 = as.factor(time2)) ps_rar = ps_trimmed %>% rarefy_even_depth(sample.size = min(sample_sums(ps)), rngseed = 31, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) ps_rar_clr <- ps_rar %>% microbiome::transform("compositional") temp = phyloseq_group_dissimilarity(ps_rar_clr, group = group, method = method, between_groups = T, justDF = T) %>% subset(Comparison =="between-groups") nylon_mesh = temp[gsub("(^.*)-.*$","\\1",temp$Group) == as.character(earliest_time),] %>% dplyr::mutate(matrix="nylon mesh") ps_trimmed = ps %>% subset_samples(matrix=="sponge") %>% ps_mutate(time2 = as.factor(time2)) ps_rar = ps_trimmed %>% rarefy_even_depth(sample.size = min(sample_sums(ps)), rngseed = 31, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) ps_rar_clr <- ps_rar %>% microbiome::transform("compositional") temp = phyloseq_group_dissimilarity(ps_rar_clr, group = group, method = method, between_groups = T, justDF = T) %>% subset(Comparison =="between-groups") sponge = temp[gsub("(^.*)-.*$","\\1",temp$Group) == as.character(earliest_time),] %>% dplyr::mutate(matrix="sponge") ps_trimmed = ps %>% subset_samples(matrix=="sponge water") %>% ps_mutate(time2 = as.factor(time2)) ps_rar = ps_trimmed %>% rarefy_even_depth(sample.size = min(sample_sums(ps)), rngseed = 31, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) ps_rar_clr <- ps_rar %>% microbiome::transform("compositional") temp = phyloseq_group_dissimilarity(ps_rar_clr, group = group, method = method, between_groups = T, justDF = T) %>% subset(Comparison =="between-groups") sponge_water = temp[gsub("(^.*)-.*$","\\1",temp$Group) == as.character(earliest_time),] %>% dplyr::mutate(matrix="sponge water") beta_all = bind_rows(nylon, nylon_disc, nylon_mesh, sponge, sponge_water) beta_all$Group = as.character(beta_all$Group) beta_all = beta_all %>% dplyr::mutate(Group = str_replace(Group, '10-30', "30")) %>% dplyr::mutate(Group = str_replace(Group, '10-60', "60")) %>% dplyr::mutate(Group = str_replace(Group, '10-180', "180")) %>% dplyr::mutate(Group = str_replace(Group, '10-360', "360")) %>% dplyr::mutate(Group = str_replace(Group, '10-720', "720")) %>% dplyr::mutate(Group = as.numeric(Group)) sbreaks = c(30,60,180,360,720) df.summary <- beta_all %>% dplyr::group_by(Group,matrix) %>% dplyr::summarise( sd = sd(Dist, na.rm = TRUE), Dist = mean(Dist) ) pdiss <- ggplot(beta_all, aes(x = Group, y = Dist, color = matrix, fill = matrix)) + #geom_jitter(size = 0.05, color="gray") + geom_line(size=2, data = df.summary) + #geom_errorbar( aes(ymin = Dist-sd, ymax = Dist+sd), data = df.summary, width = 0.25,size=0.8)+ labs(x = "Deployment time (min)", y = "Beta diversity dissimilarity") + scale_fill_viridis_d(guide = guide_legend(title = "Matrix")) + scale_color_viridis_d(guide = guide_legend(title = "Matrix")) + ggthemes::theme_clean()+ scale_x_continuous(breaks=sbreaks) # + #scale_y_continuous(limits = c(0, NA)) return(pdiss) } pdiss = similarity_time_func(ps = ps , group="time2", method="bray", earliest_time=10) pdiss = set_palette(pdiss,palette = "jco") ggsave(plot = pdiss, filename = paste0("../Results_data_analysis/Figures/Time_dissimilarity.pdf"),width = 9,height = 7,device = "pdf") dd = pdiss$data# %>% dplyr::mutate(deployment="ltd") # smooth_pp = ggplot(dd, aes(x=Group %>% as.numeric(), y=Dist)) + # #geom_smooth(method="gam", formula = y ~ s(x, bs = "cs", k=6),aes(color=location))+ # geom_smooth(method="lm",aes(color=location,linetype = deployment))+ # facet_wrap(.~location, drop = TRUE, scale="free", ncol=3)+ # scale_color_brewer(palette="Paired") + # ggthemes::theme_clean() + # scale_x_continuous(breaks = c(2,6,12,26,39,52))+ # xlab("Settlement time (week)")+ # ylab("Dissimilarity")+ # theme(legend.position="none") # # smooth_pp # ggsave(plot = smooth_pp, filename = paste0("Figures/Time_dissimilarity2.pdf"),width = 12,height = 4,device = "pdf") ```