# Beringia divergence # load packages library(readxl) # read in data from excel sheet library(janitor) # cleans up input data names library(tidyverse) # for data preprocessing, ggplot and factor/string manipulation library(ggrepel) # provides the geom for Fig. 2's labels library(patchwork) # composes Fig. 2 ################################################################## ### Scale (Passeriform) Lerner et al. ND2 mutation rate to other orders using Arcones et al. rates # Get whole mitogenome mutation rates mito_rates <- read_csv("mito_mutation_rates.csv") %>% clean_names # read in divergence time estimates mitogenome_bp_Lerner <- 16604 #referenced from NC_009684.1 (Anas platyrhynchos mitogenome) mitogenome_bp_Arcones <- as.numeric(mito_rates %>% # The Arcones mutation rates only encompass certain parts of the genome - ~13000 / 16600 bp. filter(source == "Arcones") %>% summarise(total = sum(base_pairs) / length(unique(order)))) mito_rates <- mito_rates %>% mutate(weight = ifelse(source == "Lerner", base_pairs / mitogenome_bp_Lerner, base_pairs / mitogenome_bp_Arcones)) fullmito_rate_Lerner <- mito_rates %>% filter(source == "Lerner" & region != "ND2") %>% summarize(rate_substitutions_site_my = as.numeric(weighted.mean(rate_substitutions_site_my,weight))) fullmito_rate_Lerner$order = "Passeriformes" fullmito_rate_Lerner$source = "Lerner" fullmito_rate_Lerner$region = "Full_mitogenome" ND2_rate_Arcones <- mito_rates %>% # this is used for rate 3 for calculations further down - not important here select(order, rate_substitutions_site_my, source, region) %>% filter(source == "Arcones" & region == "ND2") fullmito_rate_Arcones <- mito_rates %>% filter(source == "Arcones") %>% group_by(order) %>% summarize(rate_substitutions_site_my = weighted.mean(rate_substitutions_site_my, weight)) fullmito_rate_Arcones$source <- rep("Arcones", length(fullmito_rate_Arcones$order)) fullmito_rate_Arcones$region <- rep("Full_mitogenome", length(fullmito_rate_Arcones$order)) mito_rates <- full_join(full_join(fullmito_rate_Lerner, fullmito_rate_Arcones), mito_rates) %>% filter(region == "ND2" | region == "Full_mitogenome") # binds the Arcones+Lerner full mitogenome rates + all rates, then filters to ND2 and full_mito only mito_rates <- full_join(full_join(fullmito_rate_Lerner, fullmito_rate_Arcones), mito_rates) %>% filter(region == "ND2" | region == "Full_mitogenome") %>% select(-base_pairs, -weight) # gets rid of base_pairs/weight columns passeriform_rates_ND2 <- mito_rates %>% filter(order == "Passeriformes" & region == "ND2") # Pull out all ND2 passeriform rates passeriform_rates_fullmito <- mito_rates %>% filter(order == "Passeriformes" & region == "Full_mitogenome") # Pull out all fullmito passeriform rates # create functions to correct the ND2 and full mito values correct_rate_ND2 <- function(focal, passeriform_rates = passeriform_rates_ND2) { # function divides focal rate by Arcones' passerine rate, and then multiplies by Lerner's passerine to scale it lerner <- as.numeric(passeriform_rates %>% filter(source == "Lerner") %>% select(rate_substitutions_site_my)) arcones <- as.numeric(passeriform_rates %>% filter(source == "Arcones") %>% select(rate_substitutions_site_my)) corrected <- (focal / arcones) * lerner return(corrected) } # function to scale rates correct_rate_fullmito <- function(focal, passeriform_rates = passeriform_rates_fullmito) { # function divides focal rate by Arcones' passerine rate, and then multiplies by Lerner's passerine to scale it lerner <- as.numeric(passeriform_rates %>% filter(source == "Lerner") %>% select(rate_substitutions_site_my)) arcones <- as.numeric(passeriform_rates %>% filter(source == "Arcones") %>% select(rate_substitutions_site_my)) corrected <- (focal / arcones) * lerner return(corrected) } mito_rates <- mito_rates %>% filter(source == "Arcones") %>% mutate(rate_corrected = ifelse(region == "ND2", as.numeric(map(rate_substitutions_site_my, correct_rate_ND2)), as.numeric(map(rate_substitutions_site_my, correct_rate_ND2)))) ################################################################## ### Get divergence times in Mya for all lineages # Rate 1 - Lerner et al. corrected using Arcones order-specific rates taxon_info <- read_csv("mito_divergence.csv") %>% clean_names %>% pivot_longer(6:9, names_to = "type") # get divergence times from percent divergence # create function to calculate distances get_dist_rate1 <- function(distance, order_int, region_int = "Full_mitogenome", mito_rates_int = mito_rates) { rate <- as.numeric(mito_rates_int %>% filter(order == order_int & region == region_int) %>% select(rate_corrected)) mya <- distance / (rate * 100) return(mya) } # Use get_dist_rate1() and mito_rates data frame to get divergence times in mya taxon_info <- taxon_info %>% mutate(mya = ifelse(str_detect(type, "fullmito"), as.numeric(map2(value, order, get_dist_rate1)), as.numeric(map2(value, order, get_dist_rate1, region_int = "ND2")))) # Rate 2 - Lerner et al. uncorrected rates, calculated based on passerine dataset ND2_rate_Lerner <- 0.029 # referenced from Lerner et al. supplementary materials fullmito_rate_Lerner <- as.numeric(fullmito_rate_Lerner) # calculated earlier; re-referenced here for ease of code comprehension taxon_info <- taxon_info %>% mutate(mya_uncorrected_Lerner = ifelse(str_detect(type, "fullmito"), value / (fullmito_rate_Lerner * 100), value / (ND2_rate_Lerner * 100))) # Rate 3 - Arcones et al. order-specific rates. mito_rates_Arcones <- full_join(fullmito_rate_Arcones, ND2_rate_Arcones) get_dist_rate3 <- function(distance, order_int, region_int = "Full_mitogenome", mito_rates_int = mito_rates_Arcones) { rate <- as.numeric(mito_rates_int %>% filter(order == order_int & region == region_int) %>% select(rate_substitutions_site_my)) mya <- distance / (rate * 100) return(mya) } taxon_info <- taxon_info %>% mutate(mya_uncorrected_Arcones = ifelse(str_detect(type, "fullmito"), as.numeric(map2(value, order, get_dist_rate3)), as.numeric(map2(value, order, get_dist_rate3, region_int = "ND2")))) ### Get tables 2 and 3 - sumstats for genetic distance (2) and divergence times (3) # JC distance used dist_divtime_sumstats <- taxon_info %>% filter(str_detect(type, "jc")) %>% group_by(type, taxonomic_depth) %>% summarize(mean_dist = mean(value), std_dist = sd(value), min_dist = min(value), max_dist = max(value), mean_divtime = mean(mya), std_divtime = sd(mya), min_divtime = min(mya), max_divtime = max(mya)) # very long rbind statement to add summaries of lineages in all taxonomic depths to dist_divtime_sumstats dist_divtime_sumstats <- rbind(dist_divtime_sumstats, taxon_info %>% filter(str_detect(type, "jc")) %>% group_by(type) %>% summarize(mean_dist = mean(value), std_dist = sd(value), min_dist = min(value), max_dist = max(value), mean_divtime = mean(mya), std_divtime = sd(mya), min_divtime = min(mya), max_divtime = max(mya)) %>% mutate(taxonomic_depth = "all") ) ################################################################## ### Plot distribution of full mitogenome divergence events (Figure 2) # Figure 2 - Distribution of mtDNA divergence events fig_2 <- taxon_info %>% filter(str_detect(type, "jc")) %>% select(taxon_id, taxonomic_depth, order, type, value) %>% mutate(taxonomic_depth = factor(taxonomic_depth, c("Population", "Subspecies", "Species"))) # ggplot the figure_2 dataset #use full mitogenome JC estimates, rate 1 plot_divtimes <- function(data, region, title) { text_theme <- theme(axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1.0))) plot <- data %>% filter(str_detect(type, region)) %>% ggplot(aes(x = value, y = taxonomic_depth, fill = taxonomic_depth, label = taxon_id)) + theme_classic() + text_theme + geom_point(size = rel(3), shape = 21, color = "black") + geom_text_repel(max.overlaps = Inf) + scale_y_discrete(limits = rev(levels(data$taxonomic_depth))) + stat_summary( geom = "point", fun = "mean", col = "black", size = 3, shape = 24, fill = "red", #show.legend = TRUE ) + xlab("Time (mya)") + ylab("Taxonomic depth") + #ggtitle(title) + guides(fill = guide_legend(title="Legend")) return(plot) } fullmito_plot <- plot_divtimes(fig_2, "fullmito", "Full mitogenome") nd2_plot <- plot_divtimes(fig_2, "nd2", "ND2") # PATCHWORK THE TWO ON TOP OF EACH OTHER fig_2_fullmito_nd2 <- (fullmito_plot / nd2_plot) + plot_annotation(tag_levels = 'A') # Save all figures ggsave("Figure_2.png", plot = fig_2_fullmito_nd2, device = "png", height = 1500, width = 3000, units = "px") # Figure 2 ggsave("Figure_2_fullmito.png", plot = fullmito_plot, device = "png", height = 1500, width = 3000, units = "px") # Figure S1 ggsave("Figure_2_ND2.png", plot = nd2_plot, device = "png", height = 1500, width = 3000, units = "px") ################################################################## ### Create Network Diagrams in R # Figure 3 ################################################################## ### Re-format msBayes plots (Fig. 4 & Fig. 5) # msBayes uses R under-the-hood to generate images. However, the visualization module is ensconced in its own script. # In lieu of breaking into Naoki's code and reverse-engineering it, we use magick and a little bit of command-line work to format the figures # 1. Install magick # (brew install magick) # 2. Use Preview (or Acrobat, or whatever else. I'm not your dad.) GUI to pull individual .tiff images out of msBayes output. # Code not shown; it's a GUI. # 3. Use magick convert to split the images in half horizontally. Remove the first image (we're not interested in it at the moment) # magick convert ND2_psi.tiff -crop 1x2@ +repage ND2_psi_sep.tiff # magick convert ND2_density.tiff -crop 1x2@ +repage ND2_density_sep.tiff # magick convert 3_gene_density.tiff -crop 1x2@ +repage out.tiff # magick convert 3_gene_psi.tiff -crop 1x2@ +repage 3_gene_psi_sep.tiff # 4. Use magick -shave/-crop to remove whitespace and titles that don't correspond to PeerJ formatting # magick ND2_psi_sep.tiff -crop +250+190 +repage ND2_psi_crop.tiff # magick 3_gene_psi_sep.tiff -crop +250+190 +repage 3_gene_psi_crop.tiff # magick ND2_psi_crop.tiff -crop -230-280 +repage ND2_psi_crop2.tiff # magick 3_gene_psi_crop.tiff -crop -230-280 +repage 3_gene_psi_crop2.tiff # Density takes two crop commands instead # magick ND2_density_sep.tiff -crop +160+300 +repage ND2_density_crop.tiff # magick 3_gene_density_sep.tiff -crop +160+300 +repage 3_gene_density_crop.tiff # magick ND2_density_crop.tiff -crop -100-160 +repage ND2_density_crop2.tiff # magick 3_gene_density_crop.tiff -crop -100-80 +repage 3_gene_density_crop2.tiff # 5. Convert images to combined .tiff. # magick convert -append ND2_psi_shaved.tiff 3_gene_psi_shaved.tiff Fig_4_psi.tiff # magick convert -append ND2_density_crop2.tiff 3_gene_density_crop2.tiff Fig_5_density.tiff # 6. Add labels (A, B) to combined .tiff in Preview. # Code not shown. Again, it's a GUI.