### R Script for Chunco et al., Population isolation in the Plains spadefoot toad: Causes and conservation implications ###
##
##### Load required packages #####
library(ape)
library(pegas)
library(haplotypes)
library(vegan)
#
### Input cytochrome b data file, and format as needed.
pops<-read.csv("Sbombifrons_cytb.seqs_maxpops.csv")
pops$pop<-as.factor(pops$pop)

### Will need sequence data in "Dna object" format
# To make Dna object: Split cytb sequence character vector pops$cytb into individual characters
cytbsplit<-strsplit(pops$cytb, split="")
cytbdna<-as.dna(cytbsplit)
cytbdna@seqnames<-pops$ind.names

### Will also need data in "DNAbin" format
cytbDNAbin<-haplotypes::as.DNAbin(cytbsplit)

### Construct haplotype network (Fig. 5) ###
cytb_matrix<-as.matrix(cytbDNAbin)
Haps<-pegas::haplotype(cytb_matrix) #Gives summary of haplotype sample sizes (>Haps)
reduced_cytbNet<-haploNet(Haps) 
summary(Haps)
(reduced_cytbNet.labs<-attr(reduced_cytbNet, "labels"))
sz<-summary(Haps)
sz<-sz[reduced_cytbNet.labs] 
reduced_region<-c(rep("L.South Texas", 13), rep("I.Texas Panhandle", 22), rep("E.South Central Kansas", 14), rep("J.West Texas", 3), rep("H.East Central New Mexico", 6), rep("A.East Colorado", 51), rep("B.Northwest Kansas", 3), rep("G.Central Oklahoma", 10), rep("F.Northwest Oklahoma", 7), rep("C.Northeast Kansas", 3), rep("D.Southwest Kansas", 6), rep("K.Southeast Arizona", 64))
R<-haploFreq(cytb_matrix, fac=reduced_region, haplo=Haps) #Computing frequencies by region
R<-R[reduced_cytbNet.labs,]
## Plot network (Fig. 5)
xy<-list(x=c(19.02, 13.01, 0.00, -9.58, -12.77, -14.60, -7.69, -6.37, -14.96, -3.41, -13.65, -0.29, -12.35, -12.23, 0.42, 2.88, -25.28, -4.10, 5.74, 7.76, 8.21, -7.99, -11.88, 8.20, -6.29, 7.33, 12.00, 11.60, 4.69, 10.76, 6.45, 2.31), y=c(9.12e+00, 9.75e+00, 0.00e+00, -1.17e-15, 5.28e+00, 3.01e+00, -2.88e+00, -7.33e+00, 1.33e+00, -7.47e+00, -3.64e+00, -8.21e+00, -6.88e+00, 9.05e+00, 1.19e+01, -7.69e+00, -3.46e+00, 1.09e+01, -6.16e+00, 4.40e+00, -2.87e-01, 8.57e+00, -9.69e+00,  2.06e+00, -1.33e+01, -3.93e+00,  6.74e+00,  1.35e+01, 1.03e+01,  1.60e+01,  1.77e+01, 1.75e+01))
plot(reduced_cytbNet, size=sqrt(sz), pie=R, legend=c(11.5, 3), bg=c("#882255", "#999933", "#661100","#88CCEE", "#CC6677", "#DDCC77", "#332288", "#AA4499", "#44AA99", "#888888", "#6699CC", "#117733"), scale.ratio=3.5, show.mutation=2, cex=0.6, lwd=1, threshold=c(0,1), xy=xy) 

### Calculate pairwise PhiST (Table 3) ###
phist<-pairPhiST(cytbdna, populations=pops$pop, nperm=9999)

### Input pairwise PhiST values (from Table 3), and format into a distance matrix. ###
pst<-read.csv("Sb_PhiST_Pairwise.csv") #reads in as a data frame
pst_m<-as.matrix(pst[1:12, 2:13]) #converts to matrix format
pst_dist<-as.dist(pst_m) #converts to dist object

### Input pairwise geographic distances (from Table 1), and format into a distance matrix. ###
geo<-read.csv("Sb_GeoDist_Pairwise.csv") #reads in as a data frame
geo_m<-as.matrix(geo[1:12, 2:13]) #converts to matrix format
geo_dist<-as.dist(geo_m) #converts to dist object

### Run Principal coordinate analysis with PhiST values to look at population structuring ###
rn<-c("E_CO", "NS_KS", "NE_KS", "SW_KS", "SCent_KS", "NW_OK", "Cent_OK", "ECent_NM", "TX_Pan", "West_TX", "SE_AZ", "STexas")
struc<-pcoa(pst_dist, rn=rn)
struc$values #displays PCoA eigenvalues
# Create plot (Fig. 6)
biplot(struc, main="", xlab="Axis 1", ylab="Axis 2", bty="n") 

### IBD analysis. Run both with and without S Texas pop comparisons ###
IBD_all<-vegan::mantel(xdis=geo_dist, ydis=pst_dist, method="pearson", permutations=9999)
IBD_all 
# Removing S Texas points
pst_m_11<-as.matrix(pst[1:11, 2:12]) #converts to matrix format and drops S Texas
pst_11_dist<-as.dist(pst_m_11)
geo_m_11<-as.matrix(geo[1:11, 2:12]) #converts to matrix format and drops S Texas
geo_11_dist<-as.dist(geo_m_11)
# IBD without S Texas
IBD_11<-vegan::mantel(xdis=geo_11_dist, ydis=pst_11_dist, method="pearson", permutations=9999)
IBD_11 

### IBD figure (Fig. 7) ###
# Calculate slope and intercept of two IBD lines (with and without S Texas points)
regIBDall<-lm(pst_dist~geo_dist)
regIBDall #slope = 0.0006474, intercept = -0.1085541
regIBD_11<-lm(pst_11_dist~geo_11_dist)
regIBD_11 #slope = 0.0005356, intercept = -0.0713701
# Construct IBD Plot #
y<-expression(Genetic~Distance~(Pairwise~Phi[ST]))
plot(geo_dist, pst_dist, pch=1, cex=1, cex.lab=0.9, cex.axis=0.9, xlab="Geographic Distance (km)", ylab=y, xlim=c(0, 1600), ylim=c(0,1.0), bty="n")
points(x=geo[12,2:12], y=pst[12,2:12], pch=19, cex=1) #fills in S Texas points
line_allIBD<-0.0006474*(190:1510)-0.1085541 #defines regression line using all populations, values from regIBDall
line_noSTX<-0.0005356*(190:1510)-0.0713701 #defines regression line using all populations EXCEPT South Texas, values from regIBD_11
lines(x=190:1510, y=line_allIBD, lty=1, lwd=1.5) #plots IBD line with all
lines(x=190:1510, y=line_noSTX, lty=3, lwd=1.5) #plots IBD line without S Texas
legend(x=c(900, 1550), y=c(0.225, 0.10), legend=c("All comparisons", "S. Texas removed"), col="black", lty=c(1, 3), box.lty=0, lwd=1.5, cex=0.75)
legend(x=c(900, 1550), y=c(0.35, 0.225), legend=c("Comparisons involving S. Texas", "Other comparisons"), col="black", pch=c(19, 1), box.lty=0, cex=0.75)
rect(xleft=900, ybottom=0.065, xright=1575, ytop=0.35)