############################################################## ## Example. ## ## xcms package for metabolomics raw data preprocessing ## ## ## ## Salva Casani ## ############################################################## library(multtest) library(xcms) library(CAMERA) library(ropls) library(ggplot2) library(reshape2) ####################################################################### ### In order to work with the xcms package, the raw data needs to be ### in AIA/ANDI, NetCDF, mzXML, mzData formats. In case your data ### is not in any of those formats, documentation of your instrument's ### software may be able to help. Here we will load raw data files on ### NetCDF format. ####################################################################### cdfpath = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/pos/AB" list.files(cdfpath) cdffiles = list.files(cdfpath, recursive = TRUE, full.names = TRUE) View(cdffiles) ####################################################################### ### Peak Identification. Usage of xcmsSet to store the peaks into a ### data object. It provides methods to work with the peaks (group, ### align,...). ### The method find peaks can make use of different algorithms, which ### can be specified. ###################################################################### xset<-xcmsSet(cdffiles,method="centWave",ppm=5,peakwidth=c(5,20),snthresh=3,prefilter=c(3,1000), mzCenterFun="wMean",integrate=1,mzdiff=-0.001,noise=10000,verbose.columns=TRUE,fitgauss=FALSE) xset <- group(xset) xset2<-retcor(xset,method="obiwarp",profStep=0.1) xset2<-group(xset2,bw=5,mzwid=0.015,minfrac=0.5) xset3 <- fillPeaks(xset2) reporttab<-diffreport(xset3,"A","B",sortpval=FALSE) write.csv(reporttab, file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/pos/AB/afterxcms_pos.csv") ################################################# #neg cdfpath = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/neg/AB" list.files(cdfpath) cdffiles = list.files(cdfpath, recursive = TRUE, full.names = TRUE) View(cdffiles) ####################################################################### ### Peak Identification. Usage of xcmsSet to store the peaks into a ### data object. It provides methods to work with the peaks (group, ### align,...). ### The method find peaks can make use of different algorithms, which ### can be specified. ###################################################################### xset<-xcmsSet(cdffiles,method="centWave",ppm=5,peakwidth=c(5,20),snthresh=3,prefilter=c(3,1000),mzCenterFun="wMean",integrate=1,mzdiff=-0.001,noise=10000,verbose.columns=TRUE,fitgauss=FALSE) xset <- group(xset) xset2<-retcor(xset,method="obiwarp",profStep=0.1) xset2<-group(xset2,bw=5,mzwid=0.015,minfrac=0.5) xset3 <- fillPeaks(xset2) reporttab<-diffreport(xset3,"A","B",sortpval=FALSE) write.csv(reporttab, file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/neg/AB/afterxcms_neg.csv") ################################################## #QC based correction #pos #neg ################################################## library(statTarget) #I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection #positive mode setwd("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos") samPeno <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/samPeno.csv" samFile <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/samFilepos.csv" shiftCor(samPeno,samFile, Frule = 0.8, MLmethod = "QCRFSC", QCspan = 0,imputeM = "KNN") #negative mode setwd("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg") samPeno <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/samPeno.csv" samFile <- "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/samFileneg.csv" shiftCor(samPeno,samFile, Frule = 0.8, MLmethod = "QCRFSC", QCspan = 0,imputeM = "KNN") ########################################################### ########################################################### ######################################################### #normalize Path posdt <- read.csv("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/statTarget/shiftCor/After_shiftCor/shift_all_cor.csv") temp <- prop.table(as.matrix(posdt[,-c(1,2)]), margin = 1)*100000 posdt2 <- cbind(posdt[,c(1,2)],temp) xcmspos <- read.csv("G:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/afterxcms_pos.csv") xcmspos <- xcmspos[,c("name","mzmed","rtmed")] #metaPosdt <- merge(data.frame(name = names(posdt2)[-c(1,2)]),xcmspos,by.x = "name",all.x = T) write.csv(posdt2,file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/pos/statTarget/shiftCor/After_shiftCor/shift_all_cor_normalized.csv",row.names = F) #neg negdt <- read.csv("I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/statTarget/shiftCor/After_shiftCor/shift_all_cor.csv") temp <- prop.table(as.matrix(negdt[,-c(1,2)]), margin = 1)*100000 negdt2 <- cbind(negdt[,c(1,2)],temp) xcmsneg <- read.csv("G:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/afterxcms_neg.csv") xcmsneg <- xcmsneg[,c("name","mzmed","rtmed")] metaNegdt <- merge(data.frame(name = names(negdt2)[-c(1,2)]),xcmsneg,by.x = "name",all.x = T) write.csv(negdt2,file = "I:/ESCC+lipidomics/8-7-2020ESCC lipidomics plasma/mzXML/QCbasedcorrection/neg/statTarget/shiftCor/After_shiftCor/shift_all_cor_normalized.csv",row.names = F) ################################################################ ################################################################