setwd("/mnt/truedata/genome/stat/zjc") library(tidyverse) library(readxl) library(psych) library(agricolae) library(vegan) library(ggplot2) require(plyr) library(dplyr) library(stringr) library(rfPermute) ## Calculate Soil Fertility Index env <- read_xlsx("202410zjcenv.xlsx") names(env) # select the variables relating to soil fertility soilferti <- env[,12:18] # standardize the data (z-score) std_soilferti <- scale(soilferti,center = TRUE) # soil fertility index SFI <- rowMeans(std_soilferti) env$FI <- SFI # relationships between soil fertility variables and soil fertility index cor.test.df <- env[,c(12:18,22)] cor_test <- function(dataset1,dataset2) { #library(pacman) library(tidyverse) library(reshape) library(psych) pp <- corr.test(dataset1,dataset2,method="spearman") cor <- pp$r pvalue <- pp$p display <- pvalue l1 <- nrow(display);l2 <- ncol(display) for(i in 1:l1){ for(k in 1:l2){ a <- as.numeric(display[i,k]) if(a <= 0.001){ a <- "***" } if( 0.001 < a && a <= 0.01){ a <- "**" } if(0.01 < a && a < 0.05){ a <- "*" } if(a >= 0.05){ a <- "" } display[i,k] <- a } } heatmap_df <- melt(cor) %>% rename(replace=c("X1"="name_dataset1","X2"="name_dataset2", "value"="cor")) %>% mutate(pvalue=melt(pvalue)[,3]) %>% # using mutate function to add the melted data to previous dataframe mutate(display=melt(display)[,3]) return(heatmap_df) } cortest_res <- cor_test(cor.test.df,cor.test.df) custom_colors <- c('#619CFF', '#91CFFF', '#F0F8FF', '#FDB8B3', '#F8766D') custom_breaks <- c(-0.4,-0.2, 0.4, 0.6) Figure_corr_FI <- ggplot(cortest_res, aes(name_dataset1, name_dataset2, fill = cor)) + geom_tile() + theme_minimal() + scale_fill_gradientn(colors = custom_colors, values = scales::rescale(custom_breaks)) + geom_text(aes(label = display), size = 3, color = "black") + scale_y_discrete(position = "right") + xlab(NULL) + ylab(NULL) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "plain", colour = "black", size = 8), axis.text.y = element_text(face = "plain", colour = "black", size = 8), legend.text = element_text(face = "plain", family = "Times", colour = "black", size = 8)) + guides(fill = guide_colorbar(direction = "vertical", reverse = F, barwidth = unit(.3, "cm"), barheight = unit(5, "cm"))) + labs(fill = "") ggsave("FigureS_corr_FI.pdf",Figure_corr_FI, width = unit(6.8,"inch"),height = unit(4.5,"inch")) # Figure 1 difference of soil fertility index env$landtype_season <- paste0(env$landtype,env$season) Figure1_FI <- ggplot(data = env, aes(x = landtype_season, y = FI)) + geom_boxplot(aes(color=season,fill=season),size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Soil fertility (z-score)")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) ggsave("Figure1_FI.pdf",Figure1_FI, width = unit(6.8,"inch"),height = unit(4.5,"inch")) Figure2a_shannon <- ggplot(data = env, aes(x = landtype_season, y = shannon)) + geom_boxplot(aes(color=season,fill=season),size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial shannon diversity")+ labs(title="(A)")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) Figure2b_AWCD <- ggplot(data = env, aes(x = landtype_season, y = AWCD)) + geom_boxplot(aes(color=season,fill=season),size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Biolog incubation activity (AWCD)")+ labs(title="(B)")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) Figure2 <- cowplot::plot_grid(Figure2a_shannon,Figure2b_AWCD,align = "hv", ncol = 2) ggsave("Figure2_Microbialdiversity.pdf",Figure2, width = unit(6.8,"inch"),height = unit(4.5,"inch")) #Significance test # Normal distribution of the data shapiro.test(env$FI) # P > 0.05 shapiro.test(env$AWCD) # P > 0.05 shapiro.test(env$shannon) # P< 0.01 # Using Kruskal-Wallis for all difference test # Seasons and landuse types #FI FI_diff_all_FI <- kruskal(env[["FI"]], env[["landtype_season"]]) FI_diff_all_stat_FI <- FI_diff_all_FI$statistics FI_diff_all_label_FI <- FI_diff_all_FI$groups # AWCD FI_diff_all_AWCD <- kruskal(env[["AWCD"]], env[["landtype_season"]]) FI_diff_all_stat_AWCD <- FI_diff_all_AWCD$statistics FI_diff_all_label_AWCD <- FI_diff_all_AWCD$groups # Shannon FI_diff_all_shannon <- kruskal(env[["shannon"]], env[["landtype_season"]]) FI_diff_all_stat_shannon <- FI_diff_all_shannon$statistics FI_diff_all_label_shannon <- FI_diff_all_shannon$groups # Specific landuse types df_env_slope <- filter(env,landtype=="P") df_env_flatland <- filter(env,landtype=="Q") df_env_forest <- filter(env,landtype=="S") ## FI # Sloping land FigureS2_FI_slope <- ggplot(data = df_env_slope, aes(x = plants, y = FI)) + geom_boxplot(aes(fill=season,linetype=fence),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Soil fertility (z-score)")+ labs(title="(A)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Flat land FigureS2_FI_flatland <- ggplot(data = df_env_flatland, aes(x = plants, y = FI)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Soil fertility (z-score)")+ labs(title="(B)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Forest FigureS2_FI_forest <- ggplot(data = df_env_forest, aes(x = plants, y = FI)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Soil fertility (z-score)")+ labs(title="(C)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) FigureS2 <- cowplot::plot_grid(FigureS2_FI_slope,FigureS2_FI_flatland,FigureS2_FI_forest,align = "hv", ncol = 3) ggsave("FigureS2_FI_landtypes.pdf",FigureS2, width = unit(7.5,"inch"),height = unit(3,"inch")) # Shannon FigureS3_shannon_slope <- ggplot(data = df_env_slope, aes(x = plants, y = shannon)) + geom_boxplot(aes(fill=season,linetype=fence),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial shannon diversity")+ labs(title="(A)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Flat land FigureS3_shannon_flatland <- ggplot(data = df_env_flatland, aes(x = plants, y = shannon)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial shannon diversity")+ labs(title="(B)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Forest FigureS3_shannon_forest <- ggplot(data = df_env_forest, aes(x = plants, y = shannon)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial shannon diversity")+ labs(title="(C)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) FigureS3 <- cowplot::plot_grid(FigureS3_shannon_slope,FigureS3_shannon_flatland,FigureS3_shannon_forest,align = "hv", ncol = 3) ggsave("FigureS3_shannon_landtypes.pdf",FigureS3, width = unit(7.5,"inch"),height = unit(3,"inch")) # AWCD FigureS5_AWCD_slope <- ggplot(data = df_env_slope, aes(x = plants, y = AWCD)) + geom_boxplot(aes(fill=season,linetype=fence),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial incubation activity (AWCD)")+ labs(title="(A)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Flat land FigureS5_AWCD_flatland <- ggplot(data = df_env_flatland, aes(x = plants, y = AWCD)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial incubation activity (AWCD)")+ labs(title="(B)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) # Forest FigureS5_AWCD_forest <- ggplot(data = df_env_forest, aes(x = plants, y = AWCD)) + geom_boxplot(aes(fill=season),color="black",size=0.2,alpha=0.5,outlier.fill="white",outlier.color="white") + #geom_jitter(alpha=0.5,width =0.3,shape = 1,size=0.2)+ ylab("Bacterial incubation activity (AWCD)")+ labs(title="(C)",)+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8),axis.title.x = element_blank(), axis.text = element_text(face="bold",size = 8),axis.text.x=element_text(angle=45,hjust=1,vjust=1))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) FigureS5 <- cowplot::plot_grid(FigureS5_AWCD_slope,FigureS5_AWCD_flatland,FigureS5_AWCD_forest,align = "hv", ncol = 3) ggsave("FigureS5_AWCD_landtypes.pdf",FigureS5, width = unit(7.5,"inch"),height = unit(3,"inch")) ## Significance test # Slopeland df_env_slope$plant_season_fence <- paste0(df_env_slope$plants,df_env_slope$season,df_env_slope$fence) # FI FI_diff_slope_FI <- kruskal(df_env_slope[["FI"]], df_env_slope[["plant_season_fence"]]) FI_diff_slope_stat_FI <- FI_diff_slope_FI$statistics FI_diff_slope_label_FI <- FI_diff_slope_FI$groups # AWCD FI_diff_slope_AWCD <- kruskal(df_env_slope[["AWCD"]], df_env_slope[["plant_season_fence"]]) FI_diff_slope_stat_AWCD <- FI_diff_slope_AWCD$statistics FI_diff_slope_label_AWCD <- FI_diff_slope_AWCD$groups # Shannon FI_diff_slope_shannon <- kruskal(df_env_slope[["shannon"]], df_env_slope[["plant_season_fence"]]) FI_diff_slope_stat_shannon <- FI_diff_slope_shannon$statistics FI_diff_slope_label_shannon <- FI_diff_slope_shannon$groups # Flatland df_env_flatland$plant_season <- paste0(df_env_flatland$plants,df_env_flatland$season) #FI FI_diff_flat_FI <- kruskal(df_env_flatland[["FI"]], df_env_flatland[["plant_season"]]) FI_diff_flat_stat_FI <- FI_diff_flat_FI$statistics FI_diff_flat_label_FI <- FI_diff_flat_FI$groups # Shannon FI_diff_flat_shannon <- kruskal(df_env_flatland[["shannon"]], df_env_flatland[["plant_season"]]) FI_diff_flat_stat_shannon <- FI_diff_flat_shannon$statistics FI_diff_flat_label_shannon <- FI_diff_flat_shannon$groups # AWCD FI_diff_flat_AWCD <- kruskal(df_env_flatland[["AWCD"]], df_env_flatland[["plant_season"]]) FI_diff_flat_stat_AWCD <- FI_diff_flat_AWCD$statistics FI_diff_flat_label_AWCD <- FI_diff_flat_AWCD$groups ## forest df_env_forest$plant_season <- paste0(df_env_forest$plants,df_env_forest$season) #FI FI_diff_forest_FI <- kruskal(df_env_forest[["FI"]], df_env_forest[["plant_season"]]) FI_diff_forest_stat_FI <- FI_diff_forest_FI$statistics FI_diff_forest_label_FI <- FI_diff_forest_FI$groups # Shannon FI_diff_forest_shannon <- kruskal(df_env_forest[["shannon"]], df_env_forest[["plant_season"]]) FI_diff_forest_stat_shannon <- FI_diff_forest_shannon$statistics FI_diff_forest_label_shannon <- FI_diff_forest_shannon$groups # AWCD FI_diff_forest_AWCD <- kruskal(df_env_forest[["AWCD"]], df_env_forest[["plant_season"]]) FI_diff_forest_stat_AWCD <- FI_diff_forest_AWCD$statistics FI_diff_forest_label_AWCD <- FI_diff_forest_AWCD$groups ## Microbial community composition # NMDS analysis otu <- read_tsv("03.feature-table/feature-table_rarefied.tsv",skip = 1) otu <- as.data.frame(otu) rownames(otu) <- otu[,1] otu <- otu[-1] totu <- t(otu) bray <- vegdist(totu,method = "bray") # calculate Bray-curtis dist # running NMDS model mod_NMDS <- metaMDS(bray) points <- mod_NMDS$points sample_ID <- names(otu) permanova <- adonis2(bray~season*landtype,data =env , permutations = 999) nmds_plot_df <- bind_cols(sample_ID,points) %>% set_names("Sample_name","NMDS1","NMDS2") write.csv(nmds_plot_df,"nmds_plot_df.csv") nmds_plot_df2 <- read.csv("nmds_plot_df.csv") nmds_plot_df3 <- nmds_plot_df2 %>% inner_join(env) library(viridis) Figure3_NMDS1<- ggplot(nmds_plot_df3,aes(NMDS1,NMDS2)) + geom_point(aes(color=season,shape=landtype))+ labs(color="Season",shape="Land use")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title = element_text(face="bold",size = 9), axis.text = element_text(face="bold",size = 8))+ theme(legend.title = element_text(face="bold",size = 9),legend.text = element_text(face="bold",size = 7), legend.position = c(0.1,0.2),legend.key.height = unit(0.1, "cm")) Figure3_NMDS1 nmds_plot_df_autumn <- filter(nmds_plot_df3,season=="A") nmds_plot_df_spring <- filter(nmds_plot_df3,season=="S") group_border_autumn <- ddply(nmds_plot_df_autumn, 'landtype', function(df) df[chull(df[[3]], df[[4]]), ]) group_border_spring <- ddply(nmds_plot_df_spring, 'landtype', function(df) df[chull(df[[3]], df[[4]]), ]) Figure3_NMDS2<- ggplot(nmds_plot_df_autumn,aes(NMDS1,NMDS2)) + geom_point(aes(color=landtype,shape=landtype))+ geom_polygon(aes(fill=landtype),data = group_border_autumn, alpha = 0.5,show.legend = F)+ scale_color_viridis(discrete = T,option = "D")+ scale_fill_viridis(discrete = T,option = "D")+ labs(color="landtype",shape="Land use")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title = element_text(face="bold",size = 9), axis.text = element_text(face="bold",size = 8))+ theme(legend.title = element_text(face="bold",size = 9),legend.text = element_text(face="bold",size = 7), legend.position = c(0.1,0.2),legend.key.height = unit(0.1, "cm")) Figure3_NMDS2 Figure3_NMDS3<- ggplot(nmds_plot_df_spring,aes(NMDS1,NMDS2)) + geom_point(aes(color=landtype,shape=landtype))+ geom_polygon(aes(fill=landtype),data = group_border_spring, alpha = 0.5,show.legend = F)+ scale_color_viridis(discrete = T,option = "D")+ scale_fill_viridis(discrete = T,option = "D")+ labs(color="landtype",shape="Land use")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title = element_text(face="bold",size = 9), axis.text = element_text(face="bold",size = 8))+ theme(legend.title = element_text(face="bold",size = 9),legend.text = element_text(face="bold",size = 7), legend.position = c(0.8,0.8),legend.key.height = unit(0.1, "cm")) Figure3_NMDS3 Figure3_NMDS <- cowplot::plot_grid(Figure3_NMDS1,Figure3_NMDS2,Figure3_NMDS3,align = "hv", ncol=3) ggsave("Figure3_NMDS.pdf",Figure3_NMDS, width = unit(7.5,"inch"),height = unit(2.8,"inch")) # Community composition #phyla phyla <- read.csv("phyla.csv",row.names = 1) cleaned_phyla <- stringr::str_replace(names(phyla),".*p__","") names(phyla) <- cleaned_phyla phyla$Unclassified <- phyla$Unclassified <- rowSums(phyla %>% select(matches("Unassigned\\.__|d__Bacteria\\.__|d__Eukaryota\\.__"))) phyla <- phyla %>% select(-matches("Unassigned\\.__|d__Bacteria\\.__|d__Eukaryota\\.__")) average_abundance <- colMeans(phyla) sorted_names <- names(sort(average_abundance,decreasing = T)) sorted_phyla <- phyla[,sorted_names] write.csv(sorted_phyla,"sorted_phyla.csv") # top 10 phyla top_phyla <- sorted_phyla[,2:11] top_phyla <- top_phyla/9860 top_phyla$Sample_name <- rownames(top_phyla) if ("Sample_name" %in% colnames(top_phyla)) { top_phyla <- top_phyla[, c("Sample_name", setdiff(colnames(top_phyla), "Sample_name"))] } groups <- env[,c("Sample_name","season","landtype")] top_phyla <- inner_join(groups,top_phyla) plot_df_phyla_season <- top_phyla[,2:13] %>% pivot_longer(-c(season,landtype),names_to = "Phyla",values_to ="RelativeAbundance" ) %>% group_by(season,Phyla) %>% dplyr::summarise(avg_ra = mean(RelativeAbundance),se=sd(RelativeAbundance)/sqrt(n()),.groups = "drop") %>% arrange(Phyla,desc(avg_ra)) fig_4_1_comcomp_season_phyla <- ggplot(plot_df_phyla_season, aes(reorder(season, avgab, decreasing = TRUE), avgab * 100)) + geom_bar(aes(x = season, y = avg_ra * 100, fill = reorder(Phyla, avg_ra, decreasing = TRUE)), stat = "identity", width = 0.8) + ylab("Relative abundance (%)") + xlab("Season") + scale_fill_brewer(name = "Phyla", palette = "Set3") + theme_bw() + theme(panel.grid = element_blank()) + theme(legend.title = element_blank(), legend.text = element_text(face = "bold", size = 10)) + theme(axis.title.y = element_text(face = "bold", size = 12), axis.title.x = element_blank(), axis.text = element_text(face = "bold", size = 10), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + theme(legend.title = element_text(hjust = 0, face = "bold", size = 10), legend.text = element_text(face = "bold", size = 9), legend.position = "none", legend.key.height = unit(0.2, "cm")) fig_4_1_comcomp_season_phyla plot_df_phyla_landtype <- top_phyla[,2:13] %>% pivot_longer(-c(season,landtype),names_to = "Phyla",values_to ="RelativeAbundance" ) %>% group_by(landtype,Phyla) %>% dplyr::summarise(avg_ra = mean(RelativeAbundance),se=sd(RelativeAbundance)/sqrt(n()),.groups = "drop") %>% arrange(Phyla,desc(avg_ra)) fig_4_2_comcomp_landtype_phyla <- ggplot(plot_df_phyla_landtype, aes(reorder(landtype, avgab, decreasing = TRUE), avgab * 100)) + geom_bar(aes(x = landtype, y = avg_ra * 100, fill = reorder(Phyla, avg_ra, decreasing = TRUE)), stat = "identity", width = 0.8) + ylab("Relative abundance (%)") + xlab("Land use") + scale_fill_brewer(name = "Phyla", palette = "Set3") + theme_bw() + theme(panel.grid = element_blank()) + theme(legend.title = element_blank(), legend.text = element_text(face = "bold", size = 10)) + theme(axis.title.y = element_text(face = "bold", size = 12), axis.title.x = element_blank(), axis.text = element_text(face = "bold", size = 10), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + theme(legend.title = element_text(hjust = 0, face = "bold", size = 10), legend.text = element_text(face = "bold", size = 9), legend.position = "none", legend.key.height = unit(0.2, "cm")) fig_4_2_comcomp_landtype_phyla #genera genera <- read.csv("Genus.csv",row.names = 1) cleaned_genera <- str_replace(names(genera), "^.*g__(.*)", "\\1") cleaned_genera[cleaned_genera == names(genera)] <- "Unclassified" cleaned_genera <- ifelse(str_detect(names(genera), "Unassigned|uncultured"), "Unclassified", cleaned_genera) names(genera) <- cleaned_genera genera <- genera %>% select(-matches("Unclassified")) genera$Unclassified <- 9860-rowSums(genera) average_abundance_genera <- colMeans(genera) sorted_names_genera <- names(sort(average_abundance_genera,decreasing = T)) sorted_genera <- genera[,sorted_names_genera] names(sorted_genera)[2] <-"Acidobacteriota_Subgroup_2" write.csv(sorted_genera,"sorted_genera.csv") # top 10 genera top_genera <- sorted_genera[,2:11] top_genera <- top_genera/9860 top_genera$Sample_name <- rownames(top_genera) if ("Sample_name" %in% colnames(top_genera)) { top_genera <- top_genera[, c("Sample_name", setdiff(colnames(top_genera), "Sample_name"))] } groups <- env[,c("Sample_name","season","landtype")] top_genera <- inner_join(groups,top_genera) plot_df_genera_season <- top_genera[,2:13] %>% pivot_longer(-c(season,landtype),names_to = "Genera",values_to ="RelativeAbundance" ) %>% group_by(season,Genera) %>% dplyr::summarise(avg_ra = mean(RelativeAbundance),se=sd(RelativeAbundance)/sqrt(n()),.groups = "drop") %>% arrange(Genera,desc(avg_ra)) fig_4_3_comcomp_season_genera <- ggplot(plot_df_genera_season, aes(reorder(season, avgab, decreasing = TRUE), avgab * 100)) + geom_bar(aes(x = season, y = avg_ra * 100, fill = reorder(Genera, avg_ra, decreasing = TRUE)), stat = "identity", width = 0.8) + ylab("Relative abundance (%)") + xlab("Season") + scale_fill_brewer(name = "Genera", palette = "Paired") + theme_bw() + theme(panel.grid = element_blank()) + theme(legend.title = element_blank(), legend.text = element_text(face = "bold", size = 10)) + theme(axis.title.y = element_text(face = "bold", size = 12), axis.title.x = element_blank(), axis.text = element_text(face = "bold", size = 10), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + theme(legend.title = element_text(hjust = 0, face = "bold", size = 10), legend.text = element_text(face = "bold", size = 9), legend.position = "none", legend.key.height = unit(0.2, "cm")) fig_4_3_comcomp_season_genera plot_df_genera_landtype_genera <- top_genera[,2:13] %>% pivot_longer(-c(season,landtype),names_to = "Genera",values_to ="RelativeAbundance" ) %>% group_by(landtype,Genera) %>% dplyr::summarise(avg_ra = mean(RelativeAbundance),se=sd(RelativeAbundance)/sqrt(n()),.groups = "drop") %>% arrange(Genera,desc(avg_ra)) fig_4_4_comcomp_landtype_genera <- ggplot(plot_df_genera_landtype_genera, aes(reorder(landtype, avgab, decreasing = TRUE), avgab * 100)) + geom_bar(aes(x = landtype, y = avg_ra * 100, fill = reorder(Genera, avg_ra, decreasing = TRUE)), stat = "identity", width = 0.8) + ylab("Relative abundance (%)") + xlab("landtype") + scale_fill_brewer(name = "Genera", palette = "Paired") + theme_bw() + theme(panel.grid = element_blank()) + theme(legend.title = element_blank(), legend.text = element_text(face = "bold", size = 10)) + theme(axis.title.y = element_text(face = "bold", size = 12), axis.title.x = element_blank(), axis.text = element_text(face = "bold", size = 10), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + theme(legend.title = element_text(hjust = 0, face = "bold", size = 10), legend.text = element_text(face = "bold", size = 9), legend.position = "none", legend.key.height = unit(0.2, "cm")) fig_4_4_comcomp_landtype_genera Figure4_barPlot <- cowplot::plot_grid(fig_4_1_comcomp_season_phyla,fig_4_2_comcomp_landtype_phyla,fig_4_3_comcomp_season_genera, fig_4_4_comcomp_landtype_genera,align = "hv",ncol = 2) #ggsave("Figure4_commcomp.pdf",Figure4_barPlot, width = unit(5.5,"inch"),height = unit(5,"inch")) # Random forest model colMeans(top_phyla[,c(4:13)]) # Considering top phyla with relative abundances greater than 0.01% in random forest model datamain <- top_phyla[,c(4:11)] scaled_datamain <- as.data.frame(scale(datamain, center = TRUE, scale = TRUE)) axis.used.in.rf <- env$FI rfs.dat <- data.frame(value = axis.used.in.rf, scaled_datamain) trf <- randomForest::tuneRF(rfs.dat[, 2:ncol(rfs.dat)], rfs.dat[, "value"], ntreeTry = 9999, plot = FALSE) mt <- trf[which.min(trf[,2]), 1] rf.res <- randomForest::randomForest(value ~ ., data = rfs.dat, mtry = mt, importance = TRUE, na.action = na.omit,num.rep = 9999, ntree = 9999) rf.Pres <- rfPermute::rfPermute(value ~ ., data=rfs.dat,mtry = mt,importance=T,num.rep = 9999,num.cores = 60, ntree = 9999) importance.res <- as.data.frame(importance(rf.Pres, scale = T)) importance.res.sorted <- importance.res[order(importance.res$`%IncMSE`, decreasing = TRUE),] importance.res.sorted1 <- importance.res.sorted %>% mutate(Phyla=rownames(.)) xx <- capture.output(rf.res)[length(capture.output(rf.res))] exp.perc <- as.numeric(strsplit(xx, split = ': ', fixed = T)[[1]][2]) final_importance.res.sorted.df_commcomp <- importance.res.sorted1 for (j in seq_len(nrow(final_importance.res.sorted.df_commcomp))) { if (importance.res.sorted1[j, 2] <= 0.01) { final_importance.res.sorted.df_commcomp[j, 2] = "**" } else if (importance.res.sorted1[j, 2] <= 0.05 & importance.res.sorted1[j, 2] > 0.01) { final_importance.res.sorted.df_commcomp[j, 2] = "*" } else if (importance.res.sorted1[j, 2] > 0.05 &importance.res.sorted1[j, 2] <= 0.1) { final_importance.res.sorted.df_commcomp[j, 2] = "o" } else { final_importance.res.sorted.df_commcomp[j, 2] = "ns" } } final_importance.res.sorted.df_commcomp$Phyla <- factor(final_importance.res.sorted.df_commcomp$Phyla,levels = levels(reorder(final_importance.res.sorted.df_commcomp$Phyla,final_importance.res.sorted.df_commcomp$`%IncMSE`, decreasing = T))) fig_randomforest <- ggplot(final_importance.res.sorted.df_commcomp,aes(Phyla, `%IncMSE`)) + geom_bar(stat="identity",width = 0.8,fill = "grey")+ geom_text(aes(label=`%IncMSE.pval`,y=`%IncMSE`+0.4),position=position_dodge(width=0.8),size=4,color="black")+ #geom_text(x=12,y=70,label = str_c("phoD-harbouring community \n composition #1 R^2 = ", round(global_phoD$randomforest$plot_env_NMDS_explained$exp.perc[1]/100,3)),size = 3, fontface= "bold")+ ylab("Increase in MSE %")+xlab("Variables")+ theme_bw() + theme(panel.grid = element_blank())+ theme(legend.title = element_blank(),axis.title.x = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 9), axis.text.x = element_text(face="bold",size = 8,angle = 45,hjust = 1,vjust = 1), axis.text.y = element_text(face="bold",size = 8))+ theme(legend.position = "none" ) fig_randomforest ggsave("figure5_randomforest.pdf",fig_randomforest, width = unit(6.8,"inch"),height = unit(4.5,"inch")) marginal_significant_phyla <- top_phyla[,rownames(filter(final_importance.res.sorted.df_commcomp,`%IncMSE.pval`!="ns"))] df_corr_phyla_FI <- cbind(env$FI,marginal_significant_phyla) names(df_corr_phyla_FI)[1] <-"FI" cortest_res_phylaFI <- cor_test(df_corr_phyla_FI,df_corr_phyla_FI) custom_breaks <- c(-0.1,0, 0.2, 0.4,0.6) Figure_corr_phylaFI <- ggplot(cortest_res_phylaFI, aes(name_dataset1, name_dataset2, fill = cor)) + geom_tile() + theme_minimal() + scale_fill_gradientn(colors = custom_colors, values = scales::rescale(custom_breaks)) + geom_text(aes(label = display), size = 3, color = "black") + scale_y_discrete(position = "right") + xlab(NULL) + ylab(NULL) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "plain", colour = "black", size = 8), axis.text.y = element_text(face = "plain", colour = "black", size = 8), legend.text = element_text(face = "plain", family = "Times", colour = "black", size = 8)) + guides(fill = guide_colorbar(direction = "vertical", reverse = F, barwidth = unit(.3, "cm"), barheight = unit(5, "cm"))) + labs(fill = "") Figure_corr_phylaFI ggsave("Figure_corr_phylaFI.pdf",Figure_corr_phylaFI, width = unit(6.8,"inch"),height = unit(4.5,"inch")) res_Proteobacteria<-summary(lm(FI~poly(Proteobacteria,1),df_corr_phyla_FI)) res_Proteobacteria_cof <- data.frame(res_Proteobacteria$coefficients) Figure6Proteobacteria <- ggplot(df_corr_phyla_FI,aes(Proteobacteria*100,FI))+geom_point(pch=1)+geom_smooth(formula = y~poly(x,1),method = "lm")+ theme_bw() + theme(panel.grid = element_blank())+ geom_text(data = data.frame(x=20,y=1),aes(x,y,label=paste("R^2 = ",round(res_Proteobacteria$r.squared,3),"\nP = ",signif(res_Proteobacteria_cof$Pr...t..[2], digits = 3))), size=3,fontface= "bold", color="black")+ xlab("Relative abundances \n of Proteobacteria (%)")+ ylab("Soil fertility (z-score)")+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8), axis.text = element_text(face="bold",size = 8))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) Figure6Proteobacteria res_Acidobacteriota<-summary(lm(FI~poly(Acidobacteriota,1),df_corr_phyla_FI)) res_Acidobacteriota_cof <- data.frame(res_Acidobacteriota$coefficients) Figure6Acidobacteriota <- ggplot(df_corr_phyla_FI,aes(Acidobacteriota*100,FI))+geom_point(pch=1)+geom_smooth(formula = y~poly(x,1),method = "lm")+ theme_bw() + theme(panel.grid = element_blank())+ geom_text(data = data.frame(x=10,y=1),aes(x,y,label=paste("R^2 = ",round(res_Acidobacteriota$r.squared,3),"\nP = ",signif(res_Acidobacteriota_cof$Pr...t..[2], digits = 3))), size=3,fontface= "bold", color="black")+ xlab("Relative abundances \n of Acidobacteriota (%)")+ ylab("Soil fertility (z-score)")+ theme(legend.title = element_blank(), legend.text = element_text(face="bold",size = 8)) + theme(axis.title.y = element_text(face="bold",size = 8), axis.text = element_text(face="bold",size = 8))+ theme(legend.title = element_text(hjust=0,face="bold",size = 8), legend.text = element_text(face="bold",size = 8),legend.position = "none", legend.key.height = unit(0.15, "cm") ) Figure6Acidobacteriota Figure6b <- cowplot::plot_grid(Figure6Proteobacteria,Figure6Acidobacteriota, align = "hv",ncol = 2) ggsave("Figure6b_Abundance_FI.pdf",Figure6b, width = unit(5.5,"inch"),height = unit(3,"inch")) # Path modeling library(plspm) tran <- cbind(env[,c(2,9,10,21,20,19)],df_corr_phyla_FI[,c(1,2,6)]) tran <- tran[,c(setdiff(names(tran),"FI"),"FI")] attach(tran) names(tran) humanactivity=c(0,0,0,0,0,0,0) pH=c(1,0,0,0,0,0,0) plantdiversity=c(1,1,0,0,0,0,0) microbialactivity=c(1,1,1,0,0,0,0) microbialcomposition=c(1,1,1,0,0,0,0) Microbialdiversity=c(1,1,1,0,0,0,0) FI=c(1,1,1,1,1,1,0) tran_path=rbind(humanactivity,pH,plantdiversity,microbialactivity,microbialcomposition,Microbialdiversity,FI) innerplot(tran_path) colnames(tran_path) = rownames(tran_path) tran_blocks=list(2,3,4,6,7:8,5,9) tran_modes=rep("A",7) tran_pls=plspm(tran,tran_path,tran_blocks,modes=tran_modes,boot.val=T,br=999) plot(tran_pls,arr.pos=0.6) summary(tran_pls)