##############################################################
##  Example.                                                ##
##  xcms package for metabolomics raw data preprocessing    ##
##                                                          ##
##  Salva Casani                                            ##
##############################################################
library(multtest)
library(xcms)
library(CAMERA)
library(ropls)
library(ggplot2)
library(reshape2)
#######################################################################
### In order to work with the xcms package, the raw data needs to be
### in AIA/ANDI, NetCDF, mzXML, mzData formats. In case your data
### is not in any of those formats, documentation of your instrument's
### software may be able to help. Here we will load raw data files on
### NetCDF format.
#######################################################################
cdfpath = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/pos/AB"
list.files(cdfpath)
cdffiles = list.files(cdfpath, recursive = TRUE, full.names = TRUE)
View(cdffiles)
#######################################################################
### Peak Identification. Usage of xcmsSet to store the peaks into a
### data object. It provides methods to work with the peaks (group,
### align,...).
### The method find peaks can make use of different algorithms, which
### can be specified.
######################################################################
xset<-xcmsSet(cdffiles,method="centWave",ppm=5,peakwidth=c(5,20),snthresh=3,prefilter=c(3,1000),
              mzCenterFun="wMean",integrate=1,mzdiff=-0.001,noise=10000,verbose.columns=TRUE,fitgauss=FALSE)

xset <- group(xset)
xset2<-retcor(xset,method="obiwarp",profStep=0.1)
xset2<-group(xset2,bw=5,mzwid=0.015,minfrac=0.5)
xset3 <- fillPeaks(xset2)
reporttab<-diffreport(xset3,"A","B",sortpval=FALSE)

write.csv(reporttab, file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/pos/AB/afterxcms_pos.csv")

#################################################
#neg
cdfpath = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/neg/AB"
list.files(cdfpath)
cdffiles = list.files(cdfpath, recursive = TRUE, full.names = TRUE)
View(cdffiles)

#######################################################################
### Peak Identification. Usage of xcmsSet to store the peaks into a
### data object. It provides methods to work with the peaks (group,
### align,...).
### The method find peaks can make use of different algorithms, which
### can be specified.
######################################################################
xset<-xcmsSet(cdffiles,method="centWave",ppm=5,peakwidth=c(5,20),snthresh=3,prefilter=c(3,1000),mzCenterFun="wMean",integrate=1,mzdiff=-0.001,noise=10000,verbose.columns=TRUE,fitgauss=FALSE)

xset <- group(xset)
xset2<-retcor(xset,method="obiwarp",profStep=0.1)
xset2<-group(xset2,bw=5,mzwid=0.015,minfrac=0.5)
xset3 <- fillPeaks(xset2)
reporttab<-diffreport(xset3,"A","B",sortpval=FALSE)

write.csv(reporttab, file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/neg/AB/afterxcms_neg.csv")

##################################################
#QC based correction
#pos
#neg
##################################################
library(statTarget) 
#I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection
#positive mode
setwd("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos")
samPeno <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/samPeno.csv"
samFile <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/samFilepos.csv"
shiftCor(samPeno,samFile, Frule = 0.8, MLmethod = "QCRFSC", QCspan = 0,imputeM = "KNN")

#negative mode
setwd("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg")
samPeno <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/samPeno.csv"
samFile <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/samFileneg.csv"
shiftCor(samPeno,samFile, Frule = 0.8, MLmethod = "QCRFSC", QCspan = 0,imputeM = "KNN")
###########################################################
###########################################################
#########################################################
#normalize Path
posdt <- read.csv("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/statTarget/shiftCor/After_shiftCor/shift_all_cor.csv")
temp <- prop.table(as.matrix(posdt[,-c(1,2)]), margin = 1)*100000
posdt2 <- cbind(posdt[,c(1,2)],temp)

xcmspos <- read.csv("G:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/afterxcms_pos.csv")
xcmspos <- xcmspos[,c("name","mzmed","rtmed")]

#metaPosdt <- merge(data.frame(name = names(posdt2)[-c(1,2)]),xcmspos,by.x = "name",all.x = T)

write.csv(posdt2,file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/statTarget/shiftCor/After_shiftCor/shift_all_cor_normalized.csv",row.names = F)

#neg
negdt <- read.csv("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/statTarget/shiftCor/After_shiftCor/shift_all_cor.csv")
temp <- prop.table(as.matrix(negdt[,-c(1,2)]), margin = 1)*100000
negdt2 <- cbind(negdt[,c(1,2)],temp) 

xcmsneg <- read.csv("G:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/afterxcms_neg.csv")
xcmsneg <- xcmsneg[,c("name","mzmed","rtmed")]
metaNegdt <- merge(data.frame(name = names(negdt2)[-c(1,2)]),xcmsneg,by.x = "name",all.x = T)

write.csv(negdt2,file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/statTarget/shiftCor/After_shiftCor/shift_all_cor_normalized.csv",row.names = F)


################################################################
################################################################