# Marker characterization setwd("/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/Data_files") ### table S1: locus information #### Locus <- c("Ctest2", "Ctest7", "Ctest9", "Ctest10", "Ctest11", "Ctest12", "Ctest16", "Ctest18", "Ctest31", "Ctest32", "Ctest36", "Ctest47") F_primer_sequence <- c("ACACACATCACTGGACTCG", "GTTATCCGTCATTCCATCC", "AACAGATGTGACATTGATGC","ATACGCACAAACTCACACC","GTGTCCACCTTTATGTCTGG","AACTGGTGGACAGTCTGG","TCAGGTACAGCATTATCGC","TTCATGAATCACTTCCTGG","GTACGCCGAAAGTAAAGC","AGAAATCCATAATCGTCTGG","AGATATTGGTGGAACGAGC","GTTGACACGATGACATAACG") R_primer_sequence <- c("CAGTAAGCAGCTCTGTTCG", "GACGTAACCACCTTGTCG", "TTGTACTGTCCTTGTAACGC","TGTCCTCTTACAGAGATCGG","AGTTGAAAATACGCACGC","CATCTTTATGAGTAGCGAGG","CAAGGACCATCAATTACCC","GTAATCAAATAAGGCGATGC","AGCTCTGACAAAGTTATGCC","ATAACGACGTAATCAGCACC","CACAACATACTCAACGAACG","ACAATTCCAGCTCTGTTAGC") Multiplex_rxn <- c("BB", "AA","BB","BB","CC","AA","CC","AA","DD","CC","DD","DD") Dye <- c("NED", "6-FAM", "6-FAM", "HEX", "HEX", "HEX", "6-FAM", "NED", "6-FAM", "NED", "HEX", "NED") markers <- data.frame(Locus, F_primer_sequence, R_primer_sequence, Dye, Multiplex_rxn) markers write.table(markers, "/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/MS/table1_locus_info.csv", sep=",") ### make genind object with all potentially important information as pop slots #### sap <- read.table("sap.csv", head=T) pac <- read.table("pac.csv", head=T) #colnames(sap) genotypes <- as.data.frame(sap[,c(43:53)]) #head(genotypes) rownames(genotypes) <- sap[,1] library(adegenet) s <- df2genind(genotypes, ncode=3, ploidy=2, type="codom", pop=rep("Sap", times=nrow(sap))) sumS <- summary(s) s@other$year <- sap$year genoPac <- as.data.frame(pac[,c(43:53)]) rownames(genoPac) <- pac[,1] genoPac <- genoPac[!rowSums(is.na(genoPac)) == 11,] p <- df2genind(genoPac, ncode=3, ploidy=2, type="codom", pop=pac$state) geno2 <- rbind(genoPac, genotypes) all <- df2genind(geno2, ncode=3, ploidy=2, type="codom") pop(all) <- c(rep("Pac", times=nrow(genoPac)), rep("Sap", times=nrow(genotypes))) #mAP = all ### HWE #### library(pegas) sl <- genind2loci(s) HWE.table <- hw.test(sl, B=1000) HWE.table # chi^2 df Pr(chi^2 >) Pr.exact #loc7 787.71773 253 0.000000e+00 0.039 #loc9 492.72480 28 0.000000e+00 0.037 #loc10 384.96101 28 0.000000e+00 0.006 #loc11 4503.47720 630 0.000000e+00 0.119 #loc12 479.10266 435 7.079422e-02 0.028 #loc16 38.49817 6 8.975667e-07 0.004 #loc18 454.74979 36 0.000000e+00 0.074 #loc31 38361.69613 55 0.000000e+00 0.000 #loc32 55.54200 6 3.602575e-10 0.418 #loc36 1093.35273 465 0.000000e+00 0.000 #loc47 1141.67978 66 0.000000e+00 0.079 # this is what I got with the function HWE.test.genind, which no longer exists # P1=Blue, P2=Horse, P3=Logger # locus16 locus11 locus32 locus7 locus12 locus18 locus9 locus10 locus31 locus36 locus47 #P1 0.3155 0.1850 0.462 5e-04 0.0215 0.0005 0.0005 0.0570 5e-04 5e-04 0.242 #P2 0.0595 0.0010 0.003 5e-04 0.0005 0.0005 0.0005 0.2485 5e-04 5e-04 0.067 #P3 0.2335 0.0055 0.147 5e-04 0.0005 0.0075 0.0085 1.0000 1e-03 5e-04 0.369 ### Null alleles #### # Brookfield # p(null allele) = (Hexp - Hobs) / (Hexp +1) s.null <- (sumS$Hexp - sumS$Hobs) / (sumS$Hexp + 1) s.null[s.null < 0] <- 0 ### informativeness = allelic richness #### library(PopGenReport) richS <- allel.rich(s) str(richS) richS$mean.richness #14.37431 richS$sum.richness #158.1174 # From the description: # To account for differences in sample sizes and genotyping success, rarefication is used in the calculation. # Allelic richness was calculated using the methods of Mousadik and Petit (1996) which are in turn based upon the work of Hurlbert (1971). ### genotyping error #### dif <- read.table("genotyping_error.csv", sep=",", head=T) # unproblematic cases: cases without genotyping error divided by all cases (minus cases without information = NA) # genoyping error: 1 - unproblematic cases dif <- as.data.frame(dif) nrow(dif) #81 error <- c( 1-nrow(subset(dif, locus7...1 == 0 | locus7...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus7...1)))), 1-nrow(subset(dif, locus9...1 == 0 | locus9...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus9...1)))), 1-nrow(subset(dif, locus10...1 == 0 | locus10...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus10...1)))), 1-nrow(subset(dif, locus11...1 == 0 | locus11...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus11...1)))), 1-nrow(subset(dif, locus12...1 == 0 | locus12...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus12...1)))), 1-nrow(subset(dif, locus16...1 == 0 | locus16...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus16...1)))), 1-nrow(subset(dif, locus18...1 == 0 | locus18...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus18...1)))), 1-nrow(subset(dif, locus31...1 == 0 | locus31...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus31...1)))), 1-nrow(subset(dif, locus32...1 == 0 | locus32...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus32...1)))), 1-nrow(subset(dif, locus36...1 == 0 | locus36...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus36...1)))), 1-nrow(subset(dif, locus47...1 == 0 | locus47...2 == 0))/(nrow(dif)-nrow(subset(dif, is.na(locus47...1))))) error ### Pacific samples #### # heterozygosity sumP <- summary(p) # allelic richness richP <- allel.rich(p) sumP$Hexp sumS$Hexp # null alleles p.null <- (sumP$Hexp - sumP$Hobs) / (sumP$Hexp + 1) p.null[p.null < 0] <- 0 # HWE pl <- genind2loci(p) HWE.table.pac <- hw.test(pl, B=1000) HWE.table.pac ### table: locus characterization #### kmer <- c(4, 4, 2, 2, 4, 4, 5, 4, 4, 4, 5) range.min <- sapply(s@all.names, min) range.max <- sapply(s@all.names, max) # locus name | kmer | range | number alleles | Hobs | Hexp | p-value of HWE options(digits=3) table2 <- data.frame(Locus = c("Ctest7", "Ctest9", "Ctest10", "Ctest11", "Ctest12", "Ctest16", "Ctest18", "Ctest31", "Ctest32", "Ctest36", "Ctest47"), Kmer = kmer, n = richS$pop.sizes[,1]/2, Range_min = sapply(s@all.names, min), Range_max = sapply(s@all.names, max), Number_alleles = sumS$loc.nall, Hobs = sumS$Hobs, Hexp = sumS$Hexp, HWE_p = HWE.table[,4], Allelic_richness = richS$all.richness[,1], Genotyping_error_rate = error, Percent_null_alleles = s.null*100) table2 write.table(table2, "/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/MS/table2_Sapelo_char.csv", row.names=F) # Pacific samples table3 <- data.frame(Locus = c("Ctest7", "Ctest9", "Ctest10", "Ctest11", "Ctest12", "Ctest16", "Ctest18", "Ctest31", "Ctest32", "Ctest36", "Ctest47"), n = richP$pop.sizes[,1]/2, Range_min = sapply(p@all.names, min), Range_max = sapply(p@all.names, max), Number_alleles = sumP$loc.nall, Hobs = sumP$Hobs, Hexp = sumP$Hexp, HWE_p = HWE.table.pac[,4], Allelic_richness = richP$all.richness[,1], Percent_null_alleles = p.null*100) table3 write.table(table3, "/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/MS/table3_Pac_char.csv", row.names=F) ### plot allele freq #### #plot genotype in adegenet manual a1 <- colSums(p@tab, na.rm=T) par(mfcol=c(3,4)) par(mar=c(5,5,1,1)) for (i in 1:11) { barplot(a1[p@loc.fac == p@loc.names[i]]) } ### dryad table #### both <- rbind(sap, pac) colnames(sap) aux <- both[,c(1, 3, 43:53)] colnames(aux) <- c("ID", "Date", "Ctest7" , "Ctest9" , "Ctest10" ,"Ctest11", "Ctest12", "Ctest16", "Ctest18", "Ctest31", "Ctest32", "Ctest36", "Ctest47") levels(aux$Date) <- c("05/01/2014", "04/21/2012", "05/24/2013", "06/01/2012" ,"06/02/2012", "06/21/2013","11/09/2012") aux$Date[is.na(aux$Date)] <- "05/01/2014" aux write.table(aux, "/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/MS/Dryad_genotypes.csv", row.names=F)