library(vegan, permute, lattice)
library(analogue)
library(pheatmap)
library(dplyr)
setwd("C:\\mothur\\2014_2016_ITS_combine\\Bioinformatic analysis\\R analysis  14 16 subsample\\Fungal OTU filter")
data=read.delim("resident.otu_table.2.txt", header=TRUE, row.name=1)
str(data)
RA=read.delim(file="RA.resident.no.SWonlyOTU.txt", header=TRUE, row.names=1)
RA=t(RA)
rr=tran(RA, method="rootroot")
rr=t(rr)
#transform with relative abundance, need to cange the data samples to be in rows.
data=t(data)
RA=tran(data, method="percent")
RA=tran(RA, "pcent2prop")
PA=tran(RA, method="pa")

#transform the samples back into columns and save the table
RA=t(RA)
write.table(RA, file="RA.resident.txt", sep="\t")
#root root transformation of the RA data
rr=tran(RA, method="rootroot")
rr=t(rr)
write.table(rr, file="rootrootRA.resident.txt", sep="\t")
#make pretty heatmaps
factors=read.delim("factors.txt", header=TRUE, row.names=1 )
factorsDS= select(factors, Time, Sample_type)
str(factors)

pheatmap(rr, annotation_col = factorsDS)
#changing the colours
factorsDS$Time=factor(factorsDS$Time, levels=c("Y_2014", "Y_2016"))
TimeCol=c("pink", "green")
names(TimeCol)= levels(factorsDS$Time)

factorsDS$Sample_type=factor(factorsDS$Sample_type, levels=c("C. concentrica", "Scopalina sp.", "Sea water", "T. anhelans"))
SampleCol=c("purple", "orange", "blue", "red")
names(SampleCol)=levels(factorsDS$Sample_type)
AnnColour=list(Time= TimeCol, Sample_type=SampleCol)
AnnColour
png(filename="rr.heatmap.resident.png", width=850, height=1000)
pheatmap(rr, annotation_col = factorsDS, annotation_colors = AnnColour,
         cutree_cols= TRUE,  treeheight_col = 100, fontsize=10,
         clustering_method = "average", fontsize_col=0, fontsize_row=12)
dev.off()

png(filename="rr.heatmap.resident.blackwhite.png", width=850, height=1000)
pheatmap(rr, annotation_col = factorsDS, annotation_colors = AnnColour,
         cutree_cols= TRUE, cutree_rows=NA, treeheight_col = 100, fontsize=10,
         colorRampPalette(c("white", "black"))(100),
         clustering_method = "average", fontsize_col=0, fontsize_row=12)
dev.off()
#Graph resident fungal without sea water only OTUs
ra=read.delim(file="RA.resident.no.SWonlyOTU.txt", header=TRUE, row.names=1)
rra=tran(ra, method="rootroot")

png(filename="rr.noSW.resident.blackwhite.png", width=850, height=1000)
pheatmap(rra, annotation_col = factorsDS, annotation_colors = AnnColour,
         cutree_cols= TRUE, cutree_rows=NA, treeheight_col = 100, fontsize=12,
         colorRampPalette(c("white", "black"))(100),
         clustering_method = "average", fontsize_col=10, fontsize_row=12)
dev.off()
png(filename="rr.noSW.resident.png", width=850, height=1000)
pheatmap(rra, annotation_col = factorsDS, annotation_colors = AnnColour,
         cutree_cols= TRUE,  treeheight_col = 100, fontsize=14,
         clustering_method = "average", fontsize_col=10, fontsize_row=14)
dev.off()
#beta diversity with RA data
#compare the 2014 and 2016
samples=factor(c("14" , "16" , "14", "16" , "14" , "16" , "14",  "16",  "14",
                 "16",  "14" , "16" , "14", "16", "14", "16", "14","16",
                 "14", "16" , "14",  "16","14",  "16"))
#cocatonate the sponges samples with the tomperal time
s2=factor(c("C_14",  "C_16",  "C_14",  "C_16", "C_14", "C_16", "S_14",  "S_16", "S_14" ,
            "S_16", "S_14", "S_16",  "SW_14", "SW_16", "SW_14", "SW_16", "SW_14","SW_16",
            "T_14","T_16", "T_14", "T_16", "T_16", "T_16" ))
#PERANOVA analysis of different sponge species using both 2014 and 2016 samples
s3=factor(c("C" , "C",  "C" , "C" , "C" , "C",  "S",  "S" , "S",
            "S", "S" , "S",  "SW", "SW","SW" ,"SW", "SW", "SW",
            "T", "T",  "T" , "T", "T", "T" ))
RA=t(RA)
ord=metaMDS(RA)
plot(ord, display = "sites", type="points")
png(filename="NMDS.otu.png", width=800, height=800)
plot(ord, display = "sites", type="t")
dev.off()

s2
cols=c("green","dark green", "yellow", "orange", "sky blue"," dark blue", "red", "dark red")
png(filename="NMDS.resident2.png", width=800, height=800)
plot(ord, display = "sites", type="t")
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[1], show.groups="C_14", border=cols[1])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[2], show.groups="C_16", border=cols[2])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[3], show.groups="S_14", border=cols[3])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[4], show.groups="S_16", border=cols[4])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[5], show.groups="SW_14", border=cols[5])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[6], show.groups="SW_16", border=cols[6])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[7], show.groups="T_14", border=cols[7])
ordiellipse(ord, groups=s2, display = "sites", kind="se", conf=0.95, lwd=2, draw = "polygon", col=cols[8], show.groups="T_16", border=cols[8])
dev.off()

#PERMANOVA with full set of data on the RA data set
adonis(RA~samples)
#analysis of RA transformed data noly between samples sponges spicies and different time points
C14C16=s2=="C_14"|s2=="C_16"
adonis(RA[C14C16,]~s2[C14C16])

S14S16=s2=="S_14"|s2=="S_16"
adonis(RA[S14S16,]~s2[S14S16])

T14T16=s2=="T_14"|s2=="T_16"
adonis(RA[T14T16,]~s2[T14T16])

SW14SW16=s2=="SW_14"|s2=="SW_16"
adonis(RA[SW14SW16,]~s2[SW14SW16])
#Adonis using raw data OTU-level PERMANOVA
adonis(data~samples)
C14C16=s2=="C_14"|s2=="C_16"
adonis(data[C14C16,]~s2[C14C16])

S14S16=s2=="S_14"|s2=="S_16"
adonis(data[S14S16,]~s2[S14S16])

T14T16=s2=="T_14"|s2=="T_16"
adonis(data[T14T16,]~s2[T14T16])

SW14SW16=s2=="SW_14"|s2=="SW_16"
adonis(data[SW14SW16,]~s2[SW14SW16])
#Adonis using PA trnsformaiton OTU-level PERMANOVA
adonis(PA~samples)
C14C16=s2=="C_14"|s2=="C_16"
adonis(PA[C14C16,]~s2[C14C16])

S14S16=s2=="S_14"|s2=="S_16"
adonis(PA[S14S16,]~s2[S14S16])

T14T16=s2=="T_14"|s2=="T_16"
adonis(PA[T14T16,]~s2[T14T16])

SW14SW16=s2=="SW_14"|s2=="SW_16"
adonis(PA[SW14SW16,]~s2[SW14SW16])
#PERANOVA analysis of different sponge species using both 2014 and 2016 samples
s3
CS=s3=="C"|s3=="S"
adonis(RA[CS,]~s3[CS])

CT=s3=="C"|s3=="T"
adonis(RA[CT,]~s3[CT])

CSW=s3=="C"|s3=="SW"
adonis(RA[CSW,]~s3[CSW])

ST=s3=="S"|s3=="T"
adonis(RA[ST,]~s3[ST])

SSW=s3=="S"|s3=="SW"
adonis(RA[SSW,]~s3[SSW])

TSW=s3=="T"|s3=="SW"
adonis(RA[TSW,]~s3[TSW])
# Adonis on raw counts
CS=s3=="C"|s3=="S"
adonis(data[CS,]~s3[CS])

CT=s3=="C"|s3=="T"
adonis(data[CT,]~s3[CT])

CSW=s3=="C"|s3=="SW"
adonis(data[CSW,]~s3[CSW])

ST=s3=="S"|s3=="T"
adonis(data[ST,]~s3[ST])

SSW=s3=="S"|s3=="SW"
adonis(data[SSW,]~s3[SSW])

TSW=s3=="T"|s3=="SW"
adonis(data[TSW,]~s3[TSW])
# Adonis on PA transformaiton
CS=s3=="C"|s3=="S"
adonis(PA[CS,]~s3[CS])

CT=s3=="C"|s3=="T"
adonis(PA[CT,]~s3[CT])

CSW=s3=="C"|s3=="SW"
adonis(PA[CSW,]~s3[CSW])

ST=s3=="S"|s3=="T"
adonis(PA[ST,]~s3[ST])

SSW=s3=="S"|s3=="SW"
adonis(PA[SSW,]~s3[SSW])

TSW=s3=="T"|s3=="SW"
adonis(PA[TSW,]~s3[TSW])
#subsampled data for shannon's index (alpha diversity)
ds=read.delim(file="resident.otu_table.2.subsample.txt", header=TRUE, row.name=1)
ds=t(ds)
dsra=tran(ds, method="percent")
dsra=tran(dsra, "pcent2prop")
dspa=tran(dsra, method="pa")
#make sure the sample name are in the cols
dsra=t(dsra)
H<-diversity(dsra)
#make new factors for analysis and adonis for the new sub-sampled dataset, removed 6 samples
s4=factor(c("14" , "16" , "14", "16" , "14" , "16" ,  "16",
             "16",  "16","14" , "16" , "14", "16", "14", "16",
             "14", "16" , "16"))

s5=factor(c("C_14",  "C_16",  "C_14",  "C_16", "C_14", "C_16",  "S_16",
                       "S_16",  "SW_16", "SW_14", "SW_16",  "SW_14","SW_16",
                       "T_14","T_16", "T_14", "T_16", "T_16" ))


s6=factor(c("C" , "C",  "C" , "C" , "C" , "C",  "S",  "S" ,
              "SW","SW" ,"SW", "SW", "SW",
           "T",  "T" , "T", "T", "T" ))

plot(H~s4, ylab="Shannon's index", xlab="Year", cex=1)
plot(H~s5, ylab="Shannon's index", xlab="Sponge samples", cex=1)
plot(H~s6, ylab="Shannon's index", xlab="Sponge species", cex=1)

png(filename="shannon's index.RA.otu.sampletype.png", width=800, height=550)
plot(H~s6, ylab="Shannon's index.", xlab="Year", cex=2)
dev.off()

png(filename="shannon's index.RA.otu.samples.png", width=800, height=550)
plot(H~s5, ylab="Shannon's index", xlab="Sponge samples", cex=5)
dev.off()
#actual shannon index values
tapply(H,s4,mean)
tapply (H, s4,sd)

tapply(H,s5,mean)
tapply (H, s5,sd)

tapply(H,s6,mean)
tapply (H, s6,sd)
#PERANOVA with subsampled data dsra raw counts temporal
adonis(ds~s4)
C14C16=s5=="C_14"|s5=="C_16"
adonis(ds[C14C16,]~s4[C14C16])

S14S16=s5=="S_14"|s5=="S_16"
adonis(ds[S14S16,]~s5[S14S16])

T14T16=s5=="T_14"|s5=="T_16"
adonis(ds[T14T16,]~s5[T14T16])

SW14SW16=s5=="SW_14"|s5=="SW_16"
adonis(ds[SW14SW16,]~s5[SW14SW16])
#PERANOVA with subsampled data dsra relative abundance temporal
adonis(dsra~s4)
C14C16=s5=="C_14"|s5=="C_16"
adonis(dsra[C14C16,]~s4[C14C16])

S14S16=s5=="S_14"|s5=="S_16"
adonis(dsra[S14S16,]~s5[S14S16])

T14T16=s5=="T_14"|s5=="T_16"
adonis(dsra[T14T16,]~s5[T14T16])

SW14SW16=s5=="SW_14"|s5=="SW_16"
adonis(dsra[SW14SW16,]~s5[SW14SW16])
#PERANOVA with subsampled data dspa presence-absence temporal
adonis(dspa~s4)
C14C16=s5=="C_14"|s5=="C_16"
adonis(dspa[C14C16,]~s4[C14C16])

S14S16=s5=="S_14"|s5=="S_16"
adonis(dspa[S14S16,]~s5[S14S16])

T14T16=s5=="T_14"|s5=="T_16"
adonis(dspa[T14T16,]~s5[T14T16])

SW14SW16=s5=="SW_14"|s5=="SW_16"
adonis(dspa[SW14SW16,]~s5[SW14SW16])
#adonis between sponge species raw counts
CS=s6=="C"|s6=="S"
adonis(ds[CS,]~s6[CS])

CT=s6=="C"|s6=="T"
adonis(ds[CT,]~s6[CT])

CSW=s6=="C"|s6=="SW"
adonis(ds[CSW,]~s6[CSW])

ST=s6=="S"|s6=="T"
adonis(ds[ST,]~s6[ST])

SSW=s6=="S"|s6=="SW"
adonis(ds[SSW,]~s6[SSW])

TSW=s6=="T"|s6=="SW"
adonis(ds[TSW,]~s6[TSW])
#PERMANOVA of sample type (sub sampled dataset)
adonis(dsra~s5)
adonis(dsra~s6)
CS=s6=="C"|s6=="S"
adonis(dsra[CS,]~s6[CS])

CT=s6=="C"|s6=="T"
adonis(dsra[CT,]~s6[CT])

CSW=s6=="C"|s6=="SW"
adonis(dsra[CSW,]~s6[CSW])

ST=s6=="S"|s6=="T"
adonis(dsra[ST,]~s6[ST])

SSW=s6=="S"|s6=="SW"
adonis(dsra[SSW,]~s6[SSW])

TSW=s6=="T"|s6=="SW"
adonis(dsra[TSW,]~s6[TSW])
#PERMANOVA of sample type (sub sampled dataset) PA transformation
CS=s6=="C"|s6=="S"
adonis(dsra[CS,]~s6[CS])

CT=s6=="C"|s6=="T"
adonis(dspa[CT,]~s6[CT])

CSW=s6=="C"|s6=="SW"
adonis(dspa[CSW,]~s6[CSW])

ST=s6=="S"|s6=="T"
adonis(dspa[ST,]~s6[ST])

SSW=s6=="S"|s6=="SW"
adonis(dspa[SSW,]~s6[SSW])

TSW=s6=="T"|s6=="SW"
adonis(dspa[TSW,]~s6[TSW])
#Shannon's index, mean and sd, data must be in a data.frame
m=tapply(H,s3,mean)
s=tapply (H, s3,sd)
oneway.test(H~s3)
#data has to be a data.frame
dat=as.data.frame(dsra)
aov.out=aov(H~s4, data=dat)
summary(aov.out)
aov.out2=aov(H~s5, data=dat)
summary(aov.out2)
TukeyHSD(aov.out2)
aov.out3=aov(H~s6, data=dat)
summary(aov.out3)
TukeyHSD(aov.out3)

summary.lm(aov.out)
kruskal.test(H~s3, data=dat)
#cluster dendrogram of the rr data to get the scale 
rra=read.delim(file ="RA.resident.no.SWonlyOTU.txt", header=TRUE, row.names=1)
rra=t(rra)
hrr=tran(rra, method="rootroot")

#samples must be in cols
hrr=t(hrr)
data.bc=vegdist(hrr,method = "bray")
col.clust=hclust(data.bc, method="average")
plot(col.clust, cex=1)
png(filename="hrr.heatmap.braycurtis2.png", width = 800, height = 800)        
pheatmap(as.matrix(hrr), rowv=NA, colorRampPalette(c("white", "black"))(10),
         colv = as.dendrogram(col.clust), annotation_col = factorsDS, 
         annotation_colors = AnnColour, fontsize =14, treeheight_col = 100,
         fontsize_col = 10, fontsize_row = 14)
dev.off()
png(filename="clust.den.hrr.png", width=800, height=550)
plot(col.clust, cex=1.5)
dev.off()
#PERMDISP
data.bc2=vegdist(rra,method = "bray")
h.clust=hclust(data.bc2, method="average")
scores(data.bc, display=c("site"))
scores(data.bc, choices=c(1,2))
bdm=betadisper(data.bc2, s3, type=c("median"), bias.adjust= FALSE)
betadisper(data.bc2, s3, type=c("centroid"), bias.adjust= FALSE)
TukeyHSD(bdm, which = "s3", order =FALSE, conf.level=0.95)
#16s bacteria
setwd("C:\\mothur\\fungal_16s\\cluster_97/16s all sponges")
dat16s=read.delim(file="relative.abund.sub.sample.filter.txt", header= TRUE, row.names=1)
dat16s.bc=vegdist(dat16s, method="bray")
s16=factor(c("Sw", "Sw", "Sw", "S",  "S",  "S",  "T",  "T",  "T",  "C",  "C",  "C"))
betadisper(dat16s.bc, s16, type=c("median"), bias.adjust=FALSE)
betadisper(dat16s.bc, s16, type=c("centroid"), bias.adjust=FALSE)
#PERMDISP of fungal only 2014 samples
fun=read.delim(file="PERMdisp.fungal.RA.txt", header=TRUE, row.names=1)
fun=t(fun)
s1=factor(c( "C",  "C",  "C",  "S",  "S",  "S", "Sw", "Sw", "Sw", "T",  "T",  "T"))
f.bc=vegdist(fun, method="bray")
betadisper(f.bc, s1, type=c("median"), bias.adjust= FALSE)
betadisper(f.bc, s1, type=c("centroid"), bias.adjust= FALSE)
mod=betadisper(f.bc, s1)
mod
anova(mod)