setwd("/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/Data_files/1_microsat-development/palfinder-output/061413_palfinder_output") table <- read.table("Ctest_PAL_summary.txt", sep="\t", header=T) nrow(table) #540,412 microsat loci total head(table) table2 <- subset(table, Primers.found..1.y.0.n. == 1) nrow(table2) #29,627 microsat loci with primers nrow(table2)/nrow(table) #0.05482299 roughly 5% of identified repeats have potentially amplifiable primers (PALs) hist(as.numeric(table2$Occurances.of.Amplifiable.Primer.Pair.in.PALs), xlab="Occurances of Amplifiable Primer Pair in PALs", breaks=8, main="") #roughly 30,000 PALs have one occurance of Amplifiable Primer Pair in PALs table3 <- subset(table2, Occurances.of.Amplifiable.Primer.Pair.in.PALs == 1) nrow(table3) #28,897 PALs with one occurance in all PALs, still a lot... hist(as.numeric(table3$Occurances.of.Amplifiable.Primer.Pair.in.Reads), ylim=c(0,50)) table4 <- subset(table3, Occurances.of.Amplifiable.Primer.Pair.in.Reads <= 10 & Occurances.of.Amplifiable.Primer.Pair.in.Reads > 1) #10x is average genome coverage, if primer pair is only once in all reads, it may be sequencing error nrow(table4) #11,260 hist(as.numeric(table4$Occurances.of.Forward.Primer.in.Reads), ylim=c(0,200)) table5 <- subset(table4, Occurances.of.Forward.Primer.in.Reads <= 10 & Occurances.of.Forward.Primer.in.Reads > 1) nrow(table5) #7,214 PALs table6 <- subset(table5, Occurances.of.Reverse.Primer.in.Reads <= 10 & Occurances.of.Reverse.Primer.in.Reads > 1) nrow(table6) #5,391 PALs write.table(table6, "PALs2.txt", sep="\t")