setwd("/Users/christineewers/Dropbox/Chelonibia/1.5_Microsat_development/Data_files/1_microsat-development/palfinder-output/061413_palfinder_output")
table <- read.table("Ctest_PAL_summary.txt", sep="\t", header=T)
nrow(table) #540,412 microsat loci total
head(table)
table2 <- subset(table, Primers.found..1.y.0.n. == 1)
nrow(table2) #29,627 microsat loci with primers
nrow(table2)/nrow(table) #0.05482299 roughly 5% of identified repeats have potentially amplifiable primers (PALs)
hist(as.numeric(table2$Occurances.of.Amplifiable.Primer.Pair.in.PALs), xlab="Occurances of Amplifiable Primer Pair in PALs", breaks=8,  main="") #roughly 30,000 PALs have one occurance of Amplifiable Primer Pair in PALs
table3 <- subset(table2, Occurances.of.Amplifiable.Primer.Pair.in.PALs == 1)
nrow(table3) #28,897 PALs with one occurance in all PALs, still a lot...
hist(as.numeric(table3$Occurances.of.Amplifiable.Primer.Pair.in.Reads), ylim=c(0,50))
table4 <- subset(table3, Occurances.of.Amplifiable.Primer.Pair.in.Reads <= 10 & Occurances.of.Amplifiable.Primer.Pair.in.Reads > 1) #10x is average genome coverage, if primer pair is only once in all reads, it may be sequencing error
nrow(table4) #11,260
hist(as.numeric(table4$Occurances.of.Forward.Primer.in.Reads), ylim=c(0,200))
table5 <- subset(table4, Occurances.of.Forward.Primer.in.Reads <= 10 & Occurances.of.Forward.Primer.in.Reads > 1)
nrow(table5) #7,214 PALs
table6 <- subset(table5, Occurances.of.Reverse.Primer.in.Reads <= 10 & Occurances.of.Reverse.Primer.in.Reads > 1)
nrow(table6) #5,391 PALs
write.table(table6, "PALs2.txt", sep="\t")