# ==============================================================================
# Metagenomic Statistical Analysis and Visualization Script
# Version: 2.0
# Author: Bioinformatician
# Date: 2024-01-20
# ==============================================================================

# Install necessary R packages
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("vegan")) install.packages("vegan")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("ggpubr")) install.packages("ggpubr")
if (!require("reshape2")) install.packages("reshape2")
if (!require("RColorBrewer")) install.packages("RColorBrewer")
if (!require("pheatmap")) install.packages("pheatmap")
if (!require("corrplot")) install.packages("corrplot")
if (!require("DESeq2")) {
  if (!require("BiocManager")) install.packages("BiocManager")
  BiocManager::install("DESeq2")
}

# Load packages
library(tidyverse)
library(vegan)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(RColorBrewer)
library(pheatmap)
library(corrplot)
library(DESeq2)

# Set theme and colors
theme_set(theme_bw(base_size = 14))
my_colors <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# Set random seed to ensure reproducible results
set.seed(123)

# ==============================================================================
# Data Loading and Preprocessing
# ==============================================================================

#' Load and preprocess metagenomic data
#' 
#' @param species_file Path to species abundance file
#' @param function_file Path to functional abundance file  
#' @param metadata_file Path to metadata file
#' @return List containing processed data frames
load_metagenomic_data <- function(species_file = "species_abundance.csv",
                                  function_file = "function_abundance.csv",
                                  metadata_file = "metadata.csv") {
  
  # Read data files
  species_abundance <- read.csv(species_file, row.names = 1, header = TRUE, check.names = FALSE)
  function_abundance <- read.csv(function_file, row.names = 1, header = TRUE, check.names = FALSE)
  metadata <- read.csv(metadata_file, row.names = 1, header = TRUE)
  
  # Ensure data consistency
  common_samples <- intersect(colnames(species_abundance), rownames(metadata))
  species_abundance <- species_abundance[, common_samples]
  function_abundance <- function_abundance[, common_samples]
  metadata <- metadata[common_samples, , drop = FALSE]
  
  # Filter low-abundance features
  filter_low_abundance <- function(abundance_table, threshold = 0.001) {
    mean_abundance <- apply(abundance_table, 1, mean)
    filtered_table <- abundance_table[mean_abundance > threshold, ]
    return(filtered_table)
  }
  
  species_filtered <- filter_low_abundance(species_abundance)
  function_filtered <- filter_low_abundance(function_abundance)
  
  # Normalize to relative abundance
  normalize_relative <- function(abundance_table) {
    relative_abundance <- apply(abundance_table, 2, function(x) x/sum(x))
    return(as.data.frame(t(relative_abundance)))
  }
  
  species_relative <- normalize_relative(species_filtered)
  function_relative <- normalize_relative(function_filtered)
  
  return(list(
    species_abundance = species_abundance,
    function_abundance = function_abundance,
    species_relative = species_relative,
    function_relative = function_relative,
    metadata = metadata
  ))
}

# Load data
data_list <- load_metagenomic_data()
species_relative <- data_list$species_relative
function_relative <- data_list$function_relative
species_filtered <- data_list$species_abundance
metadata <- data_list$metadata

# ==============================================================================
# Species Composition Analysis Visualization
# ==============================================================================

#' Create stacked bar plot of top taxa composition
plot_taxa_barplot <- function(abundance_data, metadata, top_n = 10, group_var = "Group") {
  
  # Calculate mean abundance and select top taxa
  mean_abundance <- apply(abundance_data, 2, mean)
  top_taxa <- names(sort(mean_abundance, decreasing = TRUE))[1:top_n]
  
  # Extract top taxa data
  top_data <- abundance_data[, top_taxa, drop = FALSE]
  top_data$Others <- 1 - rowSums(top_data)
  top_data$Sample <- rownames(top_data)
  
  # Merge with metadata
  top_data <- merge(top_data, metadata, by.x = "Sample", by.y = "row.names")
  
  # Convert to long format
  top_data_long <- melt(top_data, id.vars = c("Sample", group_var), 
                        variable.name = "Taxa", value.name = "Abundance")
  
  # Create plot
  p <- ggplot(top_data_long, aes_string(x = "Sample", y = "Abundance", fill = "Taxa")) +
    geom_bar(stat = "identity", position = "stack") +
    facet_grid(as.formula(paste(".~", group_var)), scales = "free_x", space = "free") +
    labs(x = "Sample", y = "Relative Abundance", title = paste("Top", top_n, "Taxa Composition")) +
    scale_fill_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(top_n + 1)) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "right",
      legend.text = element_text(size = 8)
    )
  
  return(p)
}

#' Create box plot for specific taxon across groups
plot_taxa_boxplot <- function(abundance_data, metadata, taxa, group_var = "Group") {
  
  plot_data <- data.frame(
    Abundance = abundance_data[, taxa],
    Sample = rownames(abundance_data)
  )
  plot_data <- merge(plot_data, metadata, by.x = "Sample", by.y = "row.names")
  
  p <- ggplot(plot_data, aes_string(x = group_var, y = "Abundance", fill = group_var)) +
    geom_boxplot(alpha = 0.7, outlier.shape = NA) +
    geom_jitter(position = position_jitter(width = 0.2), size = 2, alpha = 0.7) +
    labs(x = group_var, y = "Relative Abundance", title = paste("Abundance of", taxa)) +
    stat_compare_means(method = "kruskal.test", label = "p.format") +
    scale_fill_manual(values = my_colors) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  return(p)
}

# Generate species composition plots
taxa_barplot <- plot_taxa_barplot(species_relative, metadata, top_n = 10, group_var = "Group")
taxa_boxplot <- plot_taxa_boxplot(species_relative, metadata, "Bacteroides", "Group")

print(taxa_barplot)
print(taxa_boxplot)

# ==============================================================================
# Functional Composition Analysis Visualization
# ==============================================================================

#' Create heatmap of functional pathway abundance
plot_function_heatmap <- function(abundance_data, metadata, top_n = 20, group_var = "Group") {
  
  mean_abundance <- apply(abundance_data, 2, mean)
  top_functions <- names(sort(mean_abundance, decreasing = TRUE))[1:top_n]
  top_data <- abundance_data[, top_functions]
  
  # Create annotation data frame
  annotation_df <- data.frame(Group = metadata[, group_var])
  rownames(annotation_df) <- rownames(metadata)
  
  # Create heatmap
  pheatmap(t(as.matrix(top_data)),
           scale = "row",
           clustering_distance_rows = "euclidean",
           clustering_distance_cols = "euclidean",
           clustering_method = "complete",
           annotation_col = annotation_df,
           show_colnames = FALSE,
           main = paste("Top", top_n, "Functional Pathways Heatmap"),
           color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
           fontsize_row = 8)
}

#' Compare specific functional pathways across groups
plot_function_comparison <- function(abundance_data, metadata, function_id, group_var = "Group") {
  
  plot_data <- data.frame(
    Abundance = abundance_data[, function_id],
    Sample = rownames(abundance_data)
  )
  plot_data <- merge(plot_data, metadata, by.x = "Sample", by.y = "row.names")
  
  p <- ggplot(plot_data, aes_string(x = group_var, y = "Abundance", fill = group_var)) +
    geom_boxplot(alpha = 0.7, outlier.shape = NA) +
    geom_jitter(position = position_jitter(width = 0.2), size = 2, alpha = 0.7) +
    labs(x = group_var, y = "Relative Abundance", title = paste("Abundance of", function_id)) +
    stat_compare_means(method = "kruskal.test", label = "p.format") +
    scale_fill_manual(values = my_colors) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  return(p)
}

# Generate functional analysis plots
function_heatmap <- plot_function_heatmap(function_relative, metadata, top_n = 20, group_var = "Group")
function_comparison <- plot_function_comparison(function_relative, metadata, "K00001", "Group")

# ==============================================================================
# Alpha Diversity Analysis
# ==============================================================================

#' Calculate alpha diversity indices
calculate_alpha_diversity <- function(abundance_data) {
  
  data_matrix <- as.matrix(abundance_data)
  
  alpha_df <- data.frame(
    Sample = rownames(abundance_data),
    Shannon = diversity(data_matrix, index = "shannon"),
    Simpson = diversity(data_matrix, index = "simpson"),
    Richness = apply(data_matrix > 0, 1, sum),
    Chao1 = estimateR(data_matrix)[2, ],  # Chao1 richness estimator
    ACE = estimateR(data_matrix)[4, ]     # ACE richness estimator
  )
  
  # Calculate Evenness (Pielou's evenness)
  alpha_df$Evenness <- alpha_df$Shannon / log(alpha_df$Richness)
  
  return(alpha_df)
}

#' Visualize alpha diversity across groups
plot_alpha_diversity <- function(alpha_df, group_var = "Group") {
  
  alpha_long <- melt(alpha_df, id.vars = c("Sample", group_var), 
                     variable.name = "Index", value.name = "Value")
  
  p <- ggplot(alpha_long, aes_string(x = group_var, y = "Value", fill = group_var)) +
    geom_boxplot(alpha = 0.7, outlier.shape = NA) +
    geom_jitter(position = position_jitter(width = 0.2), size = 1.5, alpha = 0.7) +
    facet_wrap(~ Index, scales = "free_y", ncol = 3) +
    labs(x = group_var, y = "Diversity Index Value", title = "Alpha Diversity Indices") +
    stat_compare_means(method = "kruskal.test", label = "p.format", size = 3) +
    scale_fill_manual(values = my_colors) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  return(p)
}

# Calculate and visualize alpha diversity
alpha_diversity <- calculate_alpha_diversity(species_relative)
alpha_diversity <- merge(alpha_diversity, metadata, by.x = "Sample", by.y = "row.names")
alpha_plot <- plot_alpha_diversity(alpha_diversity, "Group")

print(alpha_plot)

# ==============================================================================
# Beta Diversity Analysis
# ==============================================================================

#' Calculate beta diversity distance matrix
calculate_beta_diversity <- function(abundance_data, method = "bray") {
  dist_matrix <- vegdist(abundance_data, method = method)
  return(dist_matrix)
}

#' Perform PCoA analysis
perform_pcoa <- function(dist_matrix) {
  pcoa <- cmdscale(dist_matrix, k = 3, eig = TRUE)
  variance_exp <- pcoa$eig / sum(pcoa$eig) * 100
  pcoa_df <- data.frame(
    Sample = rownames(pcoa$points),
    PCo1 = pcoa$points[, 1],
    PCo2 = pcoa$points[, 2],
    PCo3 = pcoa$points[, 3]
  )
  return(list(pcoa_df = pcoa_df, variance_exp = variance_exp))
}

#' Plot PCoA results
plot_pcoa <- function(pcoa_df, variance_exp, group_var = "Group", show_ellipses = TRUE) {
  
  p <- ggplot(pcoa_df, aes_string(x = "PCo1", y = "PCo2", color = group_var)) +
    geom_point(size = 3, alpha = 0.7) +
    labs(
      x = paste0("PCo1 (", round(variance_exp[1], 2), "%)"),
      y = paste0("PCo2 (", round(variance_exp[2], 2), "%)"),
      title = "Principal Coordinates Analysis (PCoA)",
      color = group_var
    ) +
    scale_color_manual(values = my_colors) +
    theme(legend.position = "right")
  
  if (show_ellipses) {
    p <- p + stat_ellipse(level = 0.95, linetype = 2, alpha = 0.5)
  }
  
  return(p)
}

#' Perform PERMANOVA analysis
perform_permanova <- function(dist_matrix, metadata, formula_str, permutations = 999) {
  permanova <- adonis2(
    as.formula(formula_str),
    data = metadata,
    permutations = permutations,
    method = "bray"
  )
  return(permanova)
}

# Calculate beta diversity and perform PCoA
bray_dist <- calculate_beta_diversity(species_relative)
pcoa_result <- perform_pcoa(bray_dist)
pcoa_df <- merge(pcoa_result$pcoa_df, metadata, by.x = "Sample", by.y = "row.names")
pcoa_plot <- plot_pcoa(pcoa_df, pcoa_result$variance_exp, "Group")

# Perform PERMANOVA
permanova_result <- perform_permanova(bray_dist, metadata, "bray_dist ~ Group")

print(pcoa_plot)
print(permanova_result)

# ==============================================================================
# Differential Abundance Analysis
# ==============================================================================

#' Perform differential abundance analysis using DESeq2
perform_deseq2 <- function(abundance_data, metadata, group_var = "Group", reference = NULL) {
  
  # Convert to integer counts (pseudo-counts)
  count_data <- round(abundance_data * 10000)
  count_data[count_data < 0] <- 0  # Ensure non-negative
  
  # Create DESeq2 object
  dds <- DESeqDataSetFromMatrix(
    countData = count_data,
    colData = metadata,
    design = as.formula(paste("~", group_var))
  )
  
  # Set reference level if provided
  if (!is.null(reference)) {
    dds[[group_var]] <- relevel(dds[[group_var]], ref = reference)
  }
  
  # Run DESeq2 analysis
  dds <- DESeq(dds)
  res <- results(dds)
  res_df <- as.data.frame(res)
  res_df$Feature <- rownames(res_df)
  
  return(list(results = res_df, dds = dds))
}

#' Create volcano plot for differential abundance results
plot_volcano <- function(deseq_results, p_cutoff = 0.05, fc_cutoff = 2, top_n = 10) {
  
  volcano_data <- deseq_results %>%
    mutate(
      Significance = case_when(
        padj < p_cutoff & log2FoldChange > log2(fc_cutoff) ~ "Up-regulated",
        padj < p_cutoff & log2FoldChange < -log2(fc_cutoff) ~ "Down-regulated",
        TRUE ~ "Not significant"
      ),
      Label = ifelse(-log10(padj) > 10 & abs(log2FoldChange) > 2, Feature, "")
    )
  
  p <- ggplot(volcano_data, aes(x = log2FoldChange, y = -log10(padj), color = Significance)) +
    geom_point(alpha = 0.6, size = 1.5) +
    geom_hline(yintercept = -log10(p_cutoff), linetype = "dashed", color = "black", alpha = 0.5) +
    geom_vline(xintercept = c(-log2(fc_cutoff), log2(fc_cutoff)), linetype = "dashed", color = "black", alpha = 0.5) +
    scale_color_manual(values = c("Down-regulated" = "blue", "Not significant" = "gray", "Up-regulated" = "red")) +
    labs(x = "log2(Fold Change)", y = "-log10(Adjusted p-value)", title = "Volcano Plot - Differential Abundance") +
    theme(legend.position = "right") +
    geom_text(aes(label = Label), vjust = 1.5, hjust = 0.5, size = 3, check_overlap = TRUE)
  
  return(p)
}

#' Create heatmap of differentially abundant features
plot_differential_heatmap <- function(abundance_data, deseq_results, metadata, 
                                      p_cutoff = 0.05, fc_cutoff = 2, top_n = 25,
                                      group_var = "Group") {
  
  # Filter significant features
  sig_features <- deseq_results %>%
    filter(padj < p_cutoff & abs(log2FoldChange) > log2(fc_cutoff)) %>%
    arrange(padj) %>%
    head(top_n) %>%
    pull(Feature)
  
  if (length(sig_features) == 0) {
    warning("No significant features found for the given thresholds.")
    return(NULL)
  }
  
  # Extract significant data
  sig_data <- abundance_data[, sig_features, drop = FALSE]
  
  # Create annotation
  annotation_df <- data.frame(Group = metadata[, group_var])
  rownames(annotation_df) <- rownames(metadata)
  
  # Create heatmap
  pheatmap(t(as.matrix(sig_data)),
           scale = "row",
           clustering_distance_rows = "euclidean",
           clustering_distance_cols = "euclidean",
           clustering_method = "complete",
           annotation_col = annotation_df,
           show_colnames = FALSE,
           main = paste("Differentially Abundant Features (FDR <", p_cutoff, "& |FC| >", fc_cutoff, ")"),
           color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
           fontsize_row = 8)
}

# Perform differential abundance analysis
deseq_results <- perform_deseq2(species_filtered, metadata, "Group", reference = "Control")
volcano_plot <- plot_volcano(deseq_results$results)
differential_heatmap <- plot_differential_heatmap(species_relative, deseq_results$results, metadata)

print(volcano_plot)

# ==============================================================================
# Functional Enrichment Analysis
# ==============================================================================

#' Perform functional enrichment analysis
perform_enrichment_analysis <- function(deseq_results, annotation_file = NULL, p_cutoff = 0.05, fc_cutoff = 2) {
  
  # Filter significant features
  sig_features <- deseq_results %>%
    filter(padj < p_cutoff & abs(log2FoldChange) > log2(fc_cutoff)) %>%
    pull(Feature)
  
  if (is.null(annotation_file)) {
    # Return basic statistics if no annotation file provided
    return(data.frame(
      Significant_Features = length(sig_features),
      Upregulated = sum(deseq_results$padj < p_cutoff & deseq_results$log2FoldChange > log2(fc_cutoff), na.rm = TRUE),
      Downregulated = sum(deseq_results$padj < p_cutoff & deseq_results$log2FoldChange < -log2(fc_cutoff), na.rm = TRUE)
    ))
  }
  
  # Load pathway annotations (example structure)
  pathway_annotation <- read.csv(annotation_file, header = TRUE)
  
  # Perform enrichment analysis
  sig_pathways <- pathway_annotation %>%
    filter(Feature %in% sig_features) %>%
    group_by(Pathway) %>%
    summarize(Count = n(), .groups = 'drop') %>%
    arrange(desc(Count))
  
  return(sig_pathways)
}

#' Plot enrichment results
plot_enrichment <- function(enrichment_results, top_n = 15, title = "Pathway Enrichment Analysis") {
  
  if (nrow(enrichment_results) == 0) {
    message("No enrichment results to plot.")
    return(NULL)
  }
  
  top_pathways <- enrichment_results %>%
    head(top_n)
  
  p <- ggplot(top_pathways, aes(x = reorder(Pathway, Count), y = Count)) +
    geom_bar(stat = "identity", fill = "steelblue", alpha = 0.7) +
    coord_flip() +
    labs(x = "Pathway", y = "Number of Features", title = title) +
    theme_minimal() +
    theme(axis.text.y = element_text(size = 10))
  
  return(p)
}

# Example enrichment analysis (requires annotation file)
# enrichment_results <- perform_enrichment_analysis(deseq_results$results, "pathway_annotation.csv")
# enrichment_plot <- plot_enrichment(enrichment_results)

# ==============================================================================
# Correlation Analysis
# ==============================================================================

#' Calculate correlations between features and environmental factors
calculate_correlations <- function(abundance_data, env_data, method = "spearman") {
  cor_matrix <- cor(abundance_data, env_data, method = method)
  return(cor_matrix)
}

#' Plot correlation heatmap
plot_correlation_heatmap <- function(cor_matrix, p_matrix = NULL, title = "Correlation Heatmap") {
  
  corrplot(cor_matrix,
           method = "color",
           type = "full",
           order = "hclust",
           tl.cex = 0.7,
           tl.col = "black",
           col = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
           main = title)
  
  if (!is.null(p_matrix)) {
    corrplot(cor_matrix,
             p.mat = p_matrix,
             insig = "label_sig",
             sig.level = c(0.001, 0.01, 0.05),
             pch.cex = 0.8,
             pch.col = "white",
             add = TRUE)
  }
}

#' Perform Mantel test
perform_mantel_test <- function(species_data, env_data, method = "bray", permutations = 999) {
  species_dist <- vegdist(species_data, method = method)
  env_dist <- vegdist(env_data, method = "euclidean")
  mantel_test <- mantel(species_dist, env_dist, method = "spearman", permutations = permutations)
  return(mantel_test)
}

# Example correlation analysis (requires environmental data)
# env_data <- read.csv("environmental_factors.csv", row.names = 1, header = TRUE)
# cor_results <- calculate_correlations(species_relative, env_data)
# plot_correlation_heatmap(cor_results)

# ==============================================================================
# Save Results and Generate Report
# ==============================================================================

#' Save all analysis results
save_analysis_results <- function(output_dir = "analysis_results") {
  
  # Create output directory
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE)
  }
  
  # Save plots
  ggsave(file.path(output_dir, "taxa_composition.png"), taxa_barplot, 
         width = 12, height = 8, dpi = 300, bg = "white")
  ggsave(file.path(output_dir, "alpha_diversity.png"), alpha_plot, 
         width = 10, height = 6, dpi = 300, bg = "white")
  ggsave(file.path(output_dir, "beta_diversity_pcoa.png"), pcoa_plot, 
         width = 8, height = 6, dpi = 300, bg = "white")
  ggsave(file.path(output_dir, "volcano_plot.png"), volcano_plot, 
         width = 8, height = 6, dpi = 300, bg = "white")
  
  # Save data results
  write.csv(alpha_diversity, file.path(output_dir, "alpha_diversity_results.csv"), row.names = FALSE)
  write.csv(deseq_results$results, file.path(output_dir, "differential_abundance_results.csv"), row.names = FALSE)
  
  # Save statistical results
  sink(file.path(output_dir, "permanova_results.txt"))
  cat("PERMANOVA Results\n")
  cat("================\n\n")
  print(permanova_result)
  sink()
  
  # Save session information
  sink(file.path(output_dir, "session_info.txt"))
  cat("Session Information\n")
  cat("==================\n\n")
  print(sessionInfo())
  sink()
  
  # Generate summary report
  generate_summary_report(output_dir)
  
  message("Analysis results saved to: ", output_dir)
}

#' Generate summary report
generate_summary_report <- function(output_dir) {
  
  report_file <- file.path(output_dir, "analysis_summary_report.txt")
  
  sink(report_file)
  cat("METAGENOMIC ANALYSIS SUMMARY REPORT\n")
  cat("===================================\n\n")
  cat("Analysis Date:", date(), "\n")
  cat("Number of Samples:", nrow(metadata), "\n")
  cat("Number of Species Features:", ncol(species_relative), "\n")
  cat("Number of Functional Features:", ncol(function_relative), "\n\n")
  
  cat("ALPHA DIVERSITY SUMMARY\n")
  cat("-----------------------\n")
  cat("Average Shannon Index:", round(mean(alpha_diversity$Shannon, na.rm = TRUE), 3), "\n")
  cat("Average Simpson Index:", round(mean(alpha_diversity$Simpson, na.rm = TRUE), 3), "\n")
  cat("Average Richness:", round(mean(alpha_diversity$Richness, na.rm = TRUE), 1), "\n\n")
  
  cat("BETA DIVERSITY RESULTS\n")
  cat("----------------------\n")
  cat("PERMANOVA R-squared:", round(permanova_result$R2[1], 4), "\n")
  cat("PERMANOVA p-value:", permanova_result$`Pr(>F)`[1], "\n\n")
  
  cat("DIFFERENTIAL ABUNDANCE RESULTS\n")
  cat("------------------------------\n")
  sig_up <- sum(deseq_results$results$padj < 0.05 & deseq_results$results$log2FoldChange > 0, na.rm = TRUE)
  sig_down <- sum(deseq_results$results$padj < 0.05 & deseq_results$results$log2FoldChange < 0, na.rm = TRUE)
  cat("Significantly upregulated features:", sig_up, "\n")
  cat("Significantly downregulated features:", sig_down, "\n")
  cat("Total significant features:", sig_up + sig_down, "\n")
  
  sink()
}

# Save all results
save_analysis_results()

# ==============================================================================
# Main Analysis Workflow
# ==============================================================================

#' Main analysis workflow function
run_complete_analysis <- function() {
  
  message("Starting complete metagenomic analysis workflow...")
  
  # Load data
  message("1. Loading and preprocessing data...")
  data_list <- load_metagenomic_data()
  
  # Species composition analysis
  message("2. Performing species composition analysis...")
  taxa_barplot <- plot_taxa_barplot(data_list$species_relative, data_list$metadata)
  
  # Alpha diversity analysis
  message("3. Calculating alpha diversity...")
  alpha_div <- calculate_alpha_diversity(data_list$species_relative)
  alpha_div <- merge(alpha_div, data_list$metadata, by.x = "Sample", by.y = "row.names")
  alpha_plot <- plot_alpha_diversity(alpha_div)
  
  # Beta diversity analysis
  message("4. Performing beta diversity analysis...")
  bray_dist <- calculate_beta_diversity(data_list$species_relative)
  pcoa_result <- perform_pcoa(bray_dist)
  pcoa_df <- merge(pcoa_result$pcoa_df, data_list$metadata, by.x = "Sample", by.y = "row.names")
  pcoa_plot <- plot_pcoa(pcoa_df, pcoa_result$variance_exp)
  permanova_result <- perform_permanova(bray_dist, data_list$metadata, "bray_dist ~ Group")
  
  # Differential abundance analysis
  message("5. Performing differential abundance analysis...")
  deseq_results <- perform_deseq2(data_list$species_abundance, data_list$metadata)
  volcano_plot <- plot_volcano(deseq_results$results)
  
  # Save results
  message("6. Saving results...")
  save_analysis_results()
  
  message("Analysis workflow completed successfully!")
  
  return(list(
    taxa_barplot = taxa_barplot,
    alpha_plot = alpha_plot,
    pcoa_plot = pcoa_plot,
    volcano_plot = volcano_plot,
    permanova_result = permanova_result,
    deseq_results = deseq_results
  ))
}

# Uncomment the following line to run the complete analysis workflow
# results <- run_complete_analysis()

message("Metagenomic analysis script loaded successfully!")
message("Use run_complete_analysis() to execute the full workflow.")