# 200405 rarefaction

setwd("~/Documents/ZFMK/2020 lysis + primer test/R scripts/1 rarefaction")

# processed table - limited to 0.01%, pick neg control filtered table instead.
#data <- read.csv("~/Google Drive/WritePapers/2020 lysis poolig/1 supporting info/Tab S1 OTU table/Lysis_OTUs_16_256_v3_rel.csv", stringsAsFactors=F)


data <- read.csv("~/Documents/ZFMK/2020 lysis + primer test/JAMP/data_cleanup/C substr negatives.csv", stringsAsFactors=F)


cbind(names(data))
row.names(data) <- data$ID
data <- data[, c(1:2, 12:71)]


data2 <- data
# pool replicates! and convert to rel abundance

data2 <- data[,1:2]

cbind(names(data))

for (i in seq(3, 62, 2)){
# merge
temp <- data[,i]+ data[, i+1]
# rel abundance
temp <- round(temp/sum(temp)*100, 6)

# add to data frame
data2 <- data.frame(data2, temp, stringsAsFactors=F)
names(data2)[ncol(data2)] <- sub("A$", "", names(data[i]))
}


nrow(data2)


data <- data2[,-c(1,2)]


tail(data)
head(data)


# 0.01% = 10.000 sequences
# 0.02% = 5.000 sequences
# 0.1% = 1.000 sequences

depth <- c(rev(seq(10000, 50000, by=10000)), rev(seq(1000, 10000, by=1000)), rev(seq(500, 900, by=100)))


all <- NULL

for(i in 100/depth){
exp <- NULL

for (j in 1:ncol(data)){
exp[j] <- sum(data[,j]> i)
}

all <- cbind(all, exp)
}


temp <- data.frame("sample"=names(data), all, stringsAsFactors=F)

names(temp) <- c("sample", paste("V", depth, sep=""))


write.csv(temp, "sequdepth.csv")


temp <- temp[-c(2:5, 12:15, 22:25), ]

col <- rep(c("Red", "Darkgray", "Green", "Black", "Blue", "Orange"), 3)


temp3 <- temp[c(1:6), ]
pdf("L1.pdf", height=9, width=6)
plot(NULL, log="x", ylim=c(0, 1600), xlim=c(500, 70000))

for (i in 1:nrow(temp3)){
lines(depth, temp3[i,-1], col=col[i], lwd=2)
text(55000, temp3[i,2], sub("L._", "", temp3$sample[i]), adj=0, cex=1, col=col[i])
}
dev.off()


temp3 <- temp[c(7:12), ]
pdf("L2.pdf", height=9, width=6)
plot(NULL, log="x", ylim=c(0, 1600), xlim=c(500, 70000))

for (i in 1:nrow(temp3)){
lines(depth, temp3[i,-1], col=col[i], lwd=2)
text(55000, temp3[i,2], sub("L._", "", temp3$sample[i]), adj=0, cex=1, col=col[i])
}
dev.off()


temp3 <- temp[c(13:18), ]
pdf("L3.pdf", height=9, width=6)
plot(NULL, log="x", ylim=c(0, 1600), xlim=c(500, 70000))

for (i in 1:nrow(temp3)){
lines(depth, temp3[i,-1], col=col[i], lwd=2)
text(55000, temp3[i,2], sub("L._", "", temp3$sample[i]), adj=0, cex=1, col=col[i])
}
dev.off()