rm(list = ls())
library(dplyr)
library(caret)
library(naivebayes)
library(rpart)
library(e1071)
library(ranger)
library(signal)
library(import)
library(rpart.plot)
library(treeheatr)
library(prospectr)
library(xgboost)
library(rstatix)
rawdata <- read.csv("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/raw.csv",check.names = F)                
wavelengths <- as.numeric(substr(colnames(rawdata)[3:ncol(rawdata)], 1, 7))#提取波数-Mac里正常显示，所以2,8改成1,7就行
colnames(rawdata)[3:ncol(rawdata)] <- wavelengths
#rawdata <- rawdata %>% .[-c(38,58,63,75,80),]
#去3
filtered_raw <- rawdata[,-c(1:2)][, apply(rawdata[,-c(1:2)], 2, function(x) all(x <= 3))]
rawdata <- cbind(rawdata[,c(1:2)],filtered_raw)

set.seed(10)
tr <- sample(1:94,size =62) #通过sample函数，获取train序列
#tr <- sample(1:99,size =66)
data_list <- list()
data_list[["trainset"]] <- rawdata[tr,]
data_list[["testset"]] <- rawdata[-tr,]
#分别获取HCMM和LCMM以及HCLC的train和test的矩阵
data_list <- list(
  LCMM = lapply(data_list, function(x) {x %>% subset(Class != "HC")}),
  HCMM = lapply(data_list, function(x) {x %>% subset(Class != "LC")}),
  HCLC = lapply(data_list, function(x) {x %>% subset(Class != "MM")})
           )


# 定义预处理组合函数
pre_methods <- function(pd1) {
  
  toDF <- function(x) {
    ldf <- do.call(rbind, lapply(seq_len(nrow(x)), function(i) x[i, ]))
  }
  
  # 定义SNV函数 Standard Normal Variate
  snv <- function(data) {
    snv_data <- standardNormalVariate(data) %>% as.data.frame
    snv_data <- cbind(sc,snv_data)
    return(snv_data)
  }
  
  # 定义CS函数 Center and Scale
  cs <- function(data) {
    prePro <- preProcess(data,method = c("center", "scale"))
    cs_data <- predict(prePro,data)
    cs_data <- cbind(sc,cs_data)
    return(cs_data)
  }
  
  # 定义The first derivative
  D1_pre <- function(data){
    newspectra <- apply(data$NIR, 1, FUN=sgolayfilt, p = 2, n = 5, m = 1, ts = 1)
    dataDERIV1 <- data.frame(data[,1:2], NIR = I(as.data.frame(t(newspectra)))) 
    DERIV1 <- toDF(dataDERIV1$NIR) 
    names(DERIV1) <- names(data[,-c(1:2)])
    return(DERIV1)
  }
  
  # 定义The second derivative 
  D2_pre <- function(data){
    newspectra <- apply(data$NIR, 1, FUN=sgolayfilt, p = 2, n = 5, m = 2, ts = 1)  
    dataDERIV2 <- data.frame(data[,1:2], NIR = I(as.data.frame(t(newspectra))))     
    DERIV2 <- toDF(dataDERIV2$NIR)
    names(DERIV2) <- names(data[,-c(1:2)])
    return(DERIV2)
  }
  
  # SNV + CS 组合
  snv_cs <- function(data) {
    prePro <- preProcess(data,method = c("center", "scale"))
    snvcs <- cbind(sc,predict(prePro,snv_data[,-c(1:2)]))
    return(snvcs)
  }
  
  # CS + SNV 组合
  cs_snv <- function(cs_data){
    a <- standardNormalVariate(cs_data[,-c(1:2)]) %>% as.data.frame
    cbind(sc,a)
  }
  
  
  sc <- pd1[,c(1:2)] #sample & class
  d <- pd1[,-c(1:2)] #data.frame没有sample和class
  snv_data <- snv(d)
  cs_data <- cs(d)
  snvcs <- snv_cs(d)
  cssnv <- cs_snv(cs_data)
  
  #NIR处理-便于MSC,D2,D1预处理
  pd2 <- data.frame(sc, NIR = I(d))
  data <- pd2
  
  # 定义MSC Multiple Scattering Correction
  newspectra <- msc(as.matrix(data$NIR))   
  dataMSC <- data.frame(data[,1:2], NIR = I(as.data.frame(newspectra)))             
  msc_data <- as.list(toDF(dataMSC$NIR)) %>% Reduce(cbind,.) %>% as.data.frame
  colnames(msc_data) <- names(d)
  mscdata <- cbind(sc,msc_data)
  
  # 转化D1 D2数据格式和其他数据一样,sample,class,var1,var2,var3.....varn.
  D1_data <- cbind(sc,as.list(D1_pre(pd2)) %>% Reduce(cbind,.) %>% as.data.frame())
  colnames(D1_data) <- colnames(pd1)
  
  D2_data <- cbind(sc,as.list(D2_pre(pd2)) %>% Reduce(cbind,.) %>% as.data.frame())
  colnames(D2_data) <- colnames(pd1)
  return(list(
    RAW = pd1,
    SNV = snv_data,
    CS = cs_data,
    DERIV1 = D1_data,
    DERIV2 = D2_data,
    SNV_CS = snvcs,
    CS_SNV = cssnv,
    MSC = mscdata
  ))
}

# 对LCMM和HCMM以及HCLC进行预处理
after_pre <- lapply(data_list, function(tarin_test) lapply(tarin_test, function(t_data) pre_methods(t_data)))

# 取出train,test
trainlist <- lapply(after_pre, function(tarin_test) tarin_test[["trainset"]])
testlist <- lapply(after_pre, function(tarin_test) tarin_test[["testset"]])

#4000-10000波数不同预处理画图
ydata <- pre_methods(rawdata[tr,])#用train里边的跑
MM_list <- lapply(ydata, function(data) {
  data <- data %>% 
    subset(Class == "MM") 
  average <- cbind(data.frame(File.Name = "new",Class = "Average"),data[,-c(1:2)] %>% colMeans() %>% t() %>% as.data.frame())
  data <- rbind(data,average)
  data <- data %>%
    reshape2::melt(id  = c("File.Name","Class"))
})

MM_plot <- Map(function(data,premethod){
  data$variable <- data$variable %>% as.character() %>% as.numeric()
  data$Class <- factor(data$Class,levels = c("MM","Average"),ordered = T)
  ggplot(data,aes(x = variable,y = value,color = Class))+
    geom_line(size = 1,lty =8)+
    # scale_color_manual(values = c(MM = "#2C91E0",Average = "firebrick3"))+
    labs(title = paste0("Trainset ",premethod),x = "Wavenumber",y="Absorbance")+
    theme_classic()+
    theme(
      panel.background = element_blank(),
      axis.text = element_text(color='black',size = 10), 
      axis.title  = element_text(color='black',size = 10, face = "bold"),
      plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
      legend.text  = element_text(color='black',size = 10),
      legend.title = element_text(color='black',size = 10, face = "bold")
    )
},MM_list,names(MM_list))

ggpubr::ggarrange(plotlist = MM_plot,ncol = 2,nrow = 4)
ggsave(filename = "modelEvaluation/MM-Spectra.tiff",dpi = 600,units = "mm",width = 400,height = 400)


#matplot方式
MM_list <- lapply(ydata, function(data) data %>% 
                    subset(Class == "MM"))
mmplot <- list()
for (premethod in names(MM_list)) {
  data <- MM_list[[premethod]]
  tiff(filename = paste0("modelEvaluation/MM Spectra - ",premethod," Preprocrss.tiff"),res = 600,units = "mm",width = 150,height = 100)
  matplot(colnames(data)[-c(1:2)],
          t(data[,-c(1:2)]),
          type = 'l',
          xlab = "Wavenumber",
          ylab ="Absorbance",
          main = paste0("MM Spectra - ",premethod," Preprocrss"))
  dev.off()
}


#设置二分类模型
fitControl <- trainControl( ## 5-fold CV, 10 repeats #需要根据情况修改
  method = "repeatedcv",
  number = 3,
  repeats = 5)

#PLS 
pls <- function(data){
  
  PLS <- train( x = data[,-c(1:2)], 
                y = data[["Class"]] %>% as.factor,
               method = "pls",   
               trControl = fitControl,
               verbose = FALSE)
  #predict(PLS,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(PLS,testdata[,-c(1:2)]),as.factor(testdata$Class))
  
}

#SVM, Support Vector Machine

svm <- function(data){
  # SVM1 <- svm( x = data[,-c(1:2)],
  #             y = data[["Class"]] %>% as.factor,
  #             cross = 5,
  #             kernel = "linear",
  #             type = "C-classification",
  #             scale =F,
  #             probability= T,
  # cost = 2)
 ## predict(SVM1,testdata[,-c(1:2)], probability = TRUE) %>% attributes() %>% .[["probabilities"]] prob被赋予属性形式存在，所以需要读取属性，再调用
 ## confusionMatrix(predict(SVM1,testdata[,-c(1:2)]),as.factor(testdata$Class))
  SVM2 <- train(x = data[,-c(1:2)], 
               y = data[["Class"]] %>% as.factor,
               method = "svmLinear", 
               trControl = fitControl)
  # predict(SVM2,testdata,type = "prob") caret内无法获取prob
  #得到allinfo均用confusionMatrix(predict(SVM2,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

#Naive bayes
nb <- function(data){
  
  NB2 <- train(x = data[,-c(1:2)], 
               y = data[["Class"]] %>% as.factor,
               method = "naive_bayes", 
               trControl=fitControl,
               usepoisson = TRUE)
  #predict(NB2,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(NB,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

#KNN
knn <- function(data){

  KNN <- train(  x = data[,-c(1:2)], 
                 y = data[["Class"]] %>% as.factor,
                 method = "knn",
                 trControl=fitControl,
                 preProcess = c("center", "scale"),
                 tuneLength = 10)
  #predict(KNN,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(KNN,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

#RF默认
rf_default <- function(data){
  RF_default <- train( x = data[,-c(1:2)],
                       y = data[["Class"]] %>% as.factor,
                       method="rf",
                       trControl=fitControl)
  #predict(RF_default,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(RF_default,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

#并行RF算法
rf_par <- function(data){
  RF_par <- train( x = data[,-c(1:2)], 
                   y = data[["Class"]] %>% as.factor, 
                   method="parRF", 
                   trControl=fitControl)
  #predict(RF_par,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(RF_par,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

rf_ranger <- function(data){

  RF_ranger <-   train( x = data[,-c(1:2)], 
                        y = data[["Class"]] %>% as.factor, 
                        method="ranger", 
                        trControl=fitControl)
  #predict(RF_ranger,testdata)目前没找到概率值
  #得到allinfo均用confusionMatrix(predict(RF_ranger,testdata[,-c(1:2)]),as.factor(testdata$Class))

}

#RFF
rf_regular <- function(data){
  RF_regular <- train(x = data[,-c(1:2)], 
                      y = data[["Class"]] %>% as.factor, 
                      method="RRF", 
                      trControl=fitControl)
  #predict(RF_regular,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(RF_regular,testdata[,-c(1:2)]),as.factor(testdata$Class))
  
}

#Tree
tree <- function(data){
  TREE <-   train(x = data[,-c(1:2)], 
                  y = data[["Class"]] %>% as.factor, 
                  method = "rpart",
                  trControl = fitControl,
                  tuneLength = 20)
  #predict(TREE,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(TREE,testdata[,-c(1:2)]),as.factor(testdata$Class))
}

#XGBTREE
xgbtree <- function(data){
  grid_default <- expand.grid(
    nrounds = 100,
    max_depth = 6,
    eta = 0.3,
    gamma = 0,
    colsample_bytree = 1,
    min_child_weight = 1,
    subsample = 1
  )
  
  # train_control <- caret::trainControl(
  #   method = "none",
  #   verboseIter = FALSE, # no training log
  #   allowParallel = TRUE # FALSE for reproducible results 
  # )
  XGBTREE <- train(
    x = data[,-c(1:2)], 
    y = data[["Class"]] %>% as.factor, 
    trControl =  fitControl,
    tuneGrid = grid_default,
    method = "xgbTree",
    verbose = TRUE
  )
  
  #predict(XGBTREE,testdata,type = "prob")  type= raw or prob ##用这个prob才是概率值
  #得到allinfo均用confusionMatrix(predict(XGBTREE,testdata[,-c(1:2)]),as.factor(testdata$Class))
}





#到时候在说
resamps <- resamples(list(PLS = PLS,
                          SVM = SVM,
                          NB = NB2,
                          KNN = KNN,
                          RRF = RF_regular,
                          RF = RF_default,
                          parRF = RF_par,
                          rangerRF = RF_ranger,
                          Tree = TREE,
                          XGBTree = XGBTREE))

#################################################################
modellist <- list(PLS = pls,
                  SVM = svm,
                  NB = nb,
                  KNN = knn,
                  RRF = rf_regular,
                  RF = rf_default,
                  parRF = rf_par,
                  rangerRF = rf_ranger,
                  Tree = tree,
                  XGBTree = xgbtree)

# 基于train建模
base_train_model <- lapply(trainlist, function(Compare) ##这一步选好需要进行的对比的数据
  lapply(modellist, function(model) lapply(Compare,model)) ##这一步选好需要用到的model
)
save(base_train_model, file = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8pre.RData")
load("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8pre.RData")
#去3
base_train_modelunder3 <- lapply(trainlist, function(Compare) ##这一步选好需要进行的对比的数据
  lapply(modellist, function(model) lapply(Compare,model)) ##这一步选好需要用到的model
)
save(base_train_modelunder3, file = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8preunder3.RData")
load("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8preunder3.RData")
base_train_model<- base_train_modelunder3 

#去3+去异常值#
base_train_modelo3 <- lapply(trainlist, function(Compare) ##这一步选好需要进行的对比的数据
  lapply(modellist, function(model) lapply(Compare,model)) ##这一步选好需要用到的model
)
save(base_train_modelo3, file = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8o3.RData")
load("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/3Group10Algorithm8o3.RData")
base_train_model<- base_train_modelo3

newdatalist <- list()
#重新整理模型顺序
for (cp in c("LCMM","HCMM","HCLC")) {
  model_list <- base_train_model[[cp]]
  for (model in names(model_list)) {
    predata_list <- model_list[[model]]
    for ( premethod in names(predata_list)) {
      predata <- predata_list[[premethod]]
      newdatalist[[cp]][[premethod]][[model]] <-  predata 
    }
  }
}
#lapply方法
resample_list <- lapply(newdatalist, function(compare) lapply(compare,function(premethod) {
  resamps <- resamples(premethod)
  theme1 <- trellis.par.get()
  theme1$plot.symbol$col = rgb(.2, .2, .2, .4)
  theme1$plot.symbol$pch = 16
  theme1$plot.line$col = rgb(1, 0, 0, .7)
  theme1$plot.line$lwd <- 2
  bwplot(resamps)
}))
#循环方法
figlist <- list()
for (cp in names(newdatalist)) {
  compare <- newdatalist[[cp]]
  for (premethod in names(compare)) {
    result <- compare[[premethod]]
    resamps <- resamples(result)
    theme1 <- trellis.par.get()
    theme1$plot.symbol$col = rgb(.2, .2, .2, .4)
    theme1$plot.symbol$pch = 16
    theme1$plot.line$col = rgb(1, 0, 0, .7)
    theme1$plot.line$lwd <- 2
    figlist[[cp]][[premethod]] <- bwplot(resamps,main = paste0(cp,"-",premethod))

  }
}
LCMMplot <- figlist[["LCMM"]]
HCMMplot <- figlist[["HCMM"]]
HCLCplot <- figlist[["HCLC"]]
p <- ggpubr::ggarrange(plotlist = LCMMplot,nrow = 4,ncol = 2)
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/LCMM.tiff",dpi = 600,units = "mm",width = 400,height = 400,bg = "white")
p <- ggpubr::ggarrange(plotlist = HCMMplot,nrow = 4,ncol = 2)
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/HCMM.tiff",dpi = 600,units = "mm",width = 400,height = 400,bg = "white")
p <- ggpubr::ggarrange(plotlist = HCLCplot,nrow = 4,ncol = 2)
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/HCLC.tiff",dpi = 600,units = "mm",width = 400,height = 400,bg = "white")

# 获取pvalue,byclass和overall信息
tt_list <- list(train = trainlist, test = testlist)
p_list <- list()
results <- data.frame() # 使用数据框而不是向量

for (t2 in c("train","test")) {
  t_list <- tt_list[[t2]]
  for (i in c("LCMM","HCMM","HCLC")) {
    premethod <- newdatalist[[i]]
    relative_data <- t_list[[i]]
    for (j in names(premethod)) {
      model <- premethod[[j]]
      testdata <- relative_data[[j]]
      for (k in names(model)) {
        train_pre_model_fit <- model[[k]]
        
        # 检查模型类型并相应处理
        if (inherits(train_pre_model_fit, "mvr")) {
          # 对于PLS模型，需要确保测试数据与训练数据有相同的列
          # 获取训练时的列名
          train_cols <- colnames(train_pre_model_fit$model$X)
          
          # 确保测试数据有相同的列（按相同的顺序）
          test_data_subset <- testdata[,-c(1:2)]
          test_data_subset <- test_data_subset[, train_cols, drop = FALSE]
          
          # 进行预测
          pred <- predict(train_pre_model_fit, newdata = test_data_subset)
          # 将预测结果转换为类别（根据你的具体需求调整）
          pred_classes <- ifelse(pred > 0.5, "Class1", "Class2") # 示例，请根据实际情况调整
        } else {
          # 对于其他模型，使用常规预测方法
          pred_classes <- predict(train_pre_model_fit, testdata[,-c(1:2)])
        }
        
        # 计算混淆矩阵
        some_info <- confusionMatrix(as.factor(pred_classes), as.factor(testdata$Class))
        overall <- some_info$byClass %>% t() %>% as.data.frame()
        byclass <- some_info$overall %>% t() %>% as.data.frame()
        
        parameter <- data.frame(
          Compare = i,
          Premethod = j,
          Model = k,
          trainORtest = t2
        )
        result <- cbind(parameter, overall, byclass)
        results <- rbind(results, result)
      }
    }
  }
}

results %>% openxlsx::write.xlsx("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/preresults.xlsx")
##############################新增模型评估######################################
# 初始化结果存储
algorithm_comparison_results <- list()

# 对每个数据集进行分析
for (dataset in c("HCMM", "LCMM")) {
  cat("分析数据集:", dataset, "\n")
  
  # 获取该数据集的所有预处理方法
  premethods <- names(newdatalist[[dataset]])
  
  # 对每个预处理方法进行分析
  for (premethod in premethods) {
    cat("  预处理方法:", premethod, "\n")
    
    # 获取该预处理方法下的所有算法
    algorithms <- names(newdatalist[[dataset]][[premethod]])
    
    # 如果至少有两个算法，进行两两比较
    if (length(algorithms) >= 2) {
      # 提取所有算法的重抽样结果
      algorithm_resamples <- list()
      for (algorithm in algorithms) {
        model <- newdatalist[[dataset]][[premethod]][[algorithm]]
        if (!is.null(model$resample)) {
          algorithm_resamples[[algorithm]] <- as.numeric(as.character(model$resample$Accuracy))
        }
      }
      
      # 进行所有可能的两两比较
      algorithm_pairs <- combn(algorithms, 2, simplify = FALSE)
      
      for (pair in algorithm_pairs) {
        alg1 <- pair[1]
        alg2 <- pair[2]
        
        # 检查两个算法都有重抽样结果
        if (!is.null(algorithm_resamples[[alg1]]) && !is.null(algorithm_resamples[[alg2]])) {
          values1 <- algorithm_resamples[[alg1]]
          values2 <- algorithm_resamples[[alg2]]
          
          # 确保长度相同且没有缺失值
          if (length(values1) == length(values2) && 
              !any(is.na(values1)) && !any(is.na(values2))) {
            
            # 执行配对t检验
            t_test <- tryCatch({
              t.test(values1, values2, paired = TRUE)
            }, error = function(e) {
              cat("    t检验错误:", e$message, "\n")
              return(NULL)
            })
            
            if (!is.null(t_test)) {
              # 计算效应量 (Cohen's d)
              mean_diff <- mean(values1 - values2)
              pooled_sd <- sqrt((sd(values1)^2 + sd(values2)^2) / 2)
              cohens_d <- mean_diff / pooled_sd
              
              # 存储结果
              result_key <- paste(dataset, premethod, alg1, alg2, sep = "_")
              algorithm_comparison_results[[result_key]] <- list(
                dataset = dataset,
                premethod = premethod,
                algorithm1 = alg1,
                algorithm2 = alg2,
                t_statistic = t_test$statistic,
                p_value = t_test$p.value,
                mean_difference = mean_diff,
                algorithm1_mean = mean(values1),
                algorithm2_mean = mean(values2),
                cohens_d = cohens_d,
                conf_int_lower = t_test$conf.int[1],
                conf_int_upper = t_test$conf.int[2]
              )
              
              cat("    比较:", alg1, "vs", alg2, "- 完成\n")
            }
          }
        }
      }
    }
  }
}

# 处理结果
if (length(algorithm_comparison_results) > 0) {
  # 将结果转换为数据框
  results_df <- do.call(rbind, lapply(algorithm_comparison_results, as.data.frame))
  
  # 应用多重比较校正
  results_df$p_adjusted <- p.adjust(results_df$p_value, method = "bonferroni")
  
  # 标记显著性
  results_df$significance <- ifelse(results_df$p_adjusted < 0.001, "***",
                                    ifelse(results_df$p_adjusted < 0.01, "**",
                                           ifelse(results_df$p_adjusted < 0.05, "*", "ns")))
  
  # 按数据集和预处理方法分组查看结果
  print("HCMM数据集结果:")
  print(subset(results_df, dataset == "HCMM"))
  
  print("LCMM数据集结果:")
  print(subset(results_df, dataset == "LCMM"))
  
  # 保存结果为CSV文件
  write.csv(results_df, "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/algorithm_pairwise_comparison_results.csv", row.names = FALSE)
  
  # 筛选显著结果
  significant_results <- results_df[results_df$p_adjusted < 0.05, ]
  print("显著结果:")
  print(significant_results)
  write.csv(significant_results, "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/algorithm_pairwise_comparison_results_sig.csv", row.names = FALSE)
} else {
  cat("没有找到任何可比较的结果\n")
}


#2.	Raw+pls/svm的模型里面，根据train的数据进行了pls-vip和svm-rfe的特征筛选，选前200个特征进一步再进行建模，然后想得到在test里面的模型预测结果。然后是根据新建立的分别有200个特征的pls和svm模型，分别在lcmm和hcmm里，想得到在test里的模型预测概率值用来画roc，然后想画之前那样的概率分布散点图。


############pls##################
library(pROC)
PLSmodel <- lapply(newdatalist, function(cp) cp[["RAW"]][["PLS"]])
##all wave
testdata <- lapply(data_list, function(cp) cp[["testset"]])


pls_test_preresults <- Map(function(pls_fit_model,test_data){
  predict(pls_fit_model,test_data,type = "prob") %>% 
    mutate(Class = test_data$Class)
},
PLSmodel,testdata)


##top200 info get
PLS_VIP_top200 <- lapply(PLSmodel, function(cp) varImp(cp) %>% .[["importance"]] %>% top_n(200,Overall) %>% rownames)
PLS_feature200_list <- lapply(c("LCMM","HCMM"), function(cp){
  features <- PLS_VIP_top200[[cp]]
  compare <- data_list[[cp]]
  top200_list <- lapply(compare, function(t2) t2 %>% dplyr::select(all_of(features)) %>% 
                          cbind(t2[,1:2],.))}
  )
names(PLS_feature200_list) <- c("LCMM","HCMM")

pls200_train_model <- lapply(PLS_feature200_list, function(cp) cp[["trainset"]] %>% pls)
testdata <- lapply(PLS_feature200_list, function(cp) cp[["testset"]])

pls200_test_preresults <- Map(function(pls_fit_model,test_data){
  predict(pls_fit_model,test_data,type = "prob") %>% 
    mutate(Class = test_data$Class)
},
pls200_train_model,testdata)


library(patchwork)
##HCMM plot
##all wave
hcmm_p <- pls_test_preresults[["HCMM"]]
p <-  hcmm_p %>% t_test(MM ~ Class)
PLS_HCMM_point_all <- ggplot(hcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "HCMM - All features (RAW + PLS)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("royalblue1","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",linewidth = 0.1)


PLS_HCMM_point_all <- ggplot(hcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "HCMM - All features (RAW + PLS)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("royalblue1","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",linewidth = 0.1)




roc_result <- roc(hcmm_p$Class,hcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
PLS_HCMM_ROC_all <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="HCMM - All features (RAW + PLS)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(face = "bold",size =15),
        panel.grid = element_blank())


##top200
hcmm_p <- pls200_test_preresults[["HCMM"]]
p <-  hcmm_p %>% t_test(MM ~ Class)
PLS_HCMM_point_top200 <- ggplot(hcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "VIP-TOP200 features (RAW + PLS)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("royalblue1","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(hcmm_p$Class,hcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
PLS_HCMM_ROC_top200 <- ggroc(roc_result,size = 0.8,
                             colour = "darkblue",
                             legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",
       y = "Sensitivity",
       title ="VIP-TOP200 features (RAW + PLS)" )+
  annotate("text",
           x =0.75,
           y=0.09, 
           label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), 
           colour = "black",
           size = 5)+
  geom_segment(aes(x = 0, 
                   xend = 1, 
                   y = 0, 
                   yend = 1),
               color = "grey",
               linetype = "dashed")+  
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(face = "bold",size =15),panel.grid = element_blank())


cowplot::plot_grid(PLS_HCMM_point_all,NULL,PLS_HCMM_ROC_all,NULL,
                   NULL,NULL,NULL,NULL,
                   PLS_HCMM_point_top200,NULL,PLS_HCMM_ROC_top200,NULL,
                   ncol = 4,nrow = 3,axis = "bltr",align = "hv",
                   rel_widths = c(1,0.05,1,0.05),
                   rel_heights = c(1,0.05,1))
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/HCMM All+top200 RAW+PLS.tiff",units = "mm",height = 200,width = 230,dpi = 600,bg = "white")

##LCMM plot
##all wave
lcmm_p <- pls_test_preresults[["LCMM"]]
p <-  lcmm_p %>% t_test(MM ~ Class)
PLS_LCMM_point_all <- ggplot(lcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "LCMM - All features (RAW + PLS)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("#00A087CC","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(lcmm_p$Class,lcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
PLS_LCMM_ROC_all <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="LCMM - All features (RAW + PLS)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            axis.title = element_text(face = "bold",size =15),panel.grid = element_blank())


##top200
lcmm_p <- pls200_test_preresults[["LCMM"]]
p <-  lcmm_p %>% t_test(MM ~ Class)
PLS_LCMM_point_top200 <- ggplot(lcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "VIP-TOP200 features (RAW + PLS)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("#00A087CC","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(lcmm_p$Class,lcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
PLS_LCMM_ROC_top200 <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="VIP-TOP200 features (RAW + PLS)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            axis.title = element_text(face = "bold",size =15),panel.grid = element_blank())

cowplot::plot_grid(PLS_LCMM_point_all,NULL,PLS_LCMM_ROC_all,NULL,
                   NULL,NULL,NULL,NULL,
                   PLS_LCMM_point_top200,NULL,PLS_LCMM_ROC_top200,NULL,
                   ncol = 4,nrow = 3,axis = "bltr",align = "hv",
                   rel_widths = c(1,0.05,1,0.05),
                   rel_heights = c(1,0.05,1))
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/LCMM All+top200 RAW+PLS.tiff",units = "mm",height = 200,width = 230,dpi = 600)


############svm##################
testdata <- lapply(data_list, function(cp) cp[["testset"]])

svm <- function(traindata){

  SVM <- e1071::svm(x = traindata[,-c(1:2)], 
                    y = traindata[["Class"]] %>% as.factor,
                    cross = 5,
                    kernel = "linear",
                    type = "C-classification",
                    scale =F,
                    probability= T,
                    cost = 2)
}
SVMmodel <- lapply(data_list, function(cp) cp[["trainset"]] %>% svm)
svm_test_preresults <- Map(function(svm_fit_model,test_data){
  predict(svm_fit_model,test_data[,-c(1:2)], probability = TRUE) %>% 
    attributes() %>% .[["probabilities"]]  %>% as.data.frame %>% 
    mutate(Class = test_data$Class)
},
SVMmodel,testdata)
###all wave
##HCMM
hcmm_p <- svm_test_preresults[["HCMM"]]
p <-  hcmm_p %>% t_test(MM ~ Class)
SVM_HCMM_point_all <- ggplot(hcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "HCMM - All features (RAW + SVM)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("royalblue1","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(hcmm_p$Class,hcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
SVM_HCMM_ROC_all <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="HCMM - All features (RAW + SVM)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            axis.title = element_text(face = "bold",size =15),panel.grid = element_blank())

##LCMM
lcmm_p <- svm_test_preresults[["LCMM"]]
p <-  lcmm_p %>% t_test(MM ~ Class)
SVM_LCMM_point_all <- ggplot(lcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "LCMM - All features (RAW + SVM)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("#00A087CC","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(lcmm_p$Class,lcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
SVM_LCMM_ROC_all <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="LCMM - All features (RAW + SVM)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(face = "bold",size =15),
        panel.grid = element_blank())




###TOP200

svm_rfe <- function(data){
  rfe(x = data[,-c(1:2)],
      y = data[["Class"]] %>% as.factor,
      rfeControl = rfeControl(functions = caretFuncs,
                              method = "repeatedcv",
                              number = 10,
                              repeats = 3,verbose = FALSE),
      method = "svmLinear",
      trControl = fitControl)
}



SVMmodel <- lapply(data_list, function(cp) cp[["trainset"]] %>%  svm_rfe)
SVM_VIP_top200 <- lapply(SVMmodel, function(cp) varImp(cp) %>% top_n(200,Overall) %>% rownames)
SVM_feature200_list <- lapply(c("LCMM","HCMM"), function(cp){
  features <- SVM_VIP_top200[[cp]]
  compare <- data_list[[cp]]
  top200_list <- lapply(compare, function(t2) t2 %>% dplyr::select(all_of(features)) %>% 
                          cbind(t2[,1:2],.))}
)
names(SVM_feature200_list) <- c("LCMM","HCMM")

svm200_train_model <- lapply(SVM_feature200_list, function(cp) cp[["trainset"]] %>% svm)
testdata <- lapply(SVM_feature200_list, function(cp) cp[["testset"]])

svm200_test_preresults <- Map(function(svm_fit_model,test_data){
  predict(svm_fit_model,test_data[,-c(1:2)], probability = TRUE) %>% 
    attributes() %>% .[["probabilities"]] %>% as.data.frame %>% 
    mutate(Class = test_data$Class)
},
svm200_train_model,testdata)

hcmm_p <- svm200_test_preresults[["HCMM"]]
p <-  hcmm_p %>% t_test(MM ~ Class)
SVM_HCMM_point_top200 <- ggplot(hcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "RFE-TOP200 features (RAW + SVM)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("royalblue1","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"),
        panel.grid = element_blank())+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)



roc_result <- roc(hcmm_p$Class,hcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
SVM_HCMM_ROC_top200 <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="RFE-TOP200 features (RAW + SVM)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
                                            axis.title = element_text(face = "bold",size =15),panel.grid = element_blank())


lcmm_p <- svm200_test_preresults[["LCMM"]]
p <-  lcmm_p %>% t_test(MM ~ Class)
SVM_LCMM_point_top200 <- ggplot(lcmm_p,aes(x = Class,y = MM,color = Class,shape = Class))+
  geom_point(alpha = 0.8,position = position_jitter(width = 0.1, height = 0))+
  labs(x = "",y = "Probability", title = "RFE-TOP200 features (RAW + SVM)")+
  annotate("text",x = 1.5,y = 1.05,label =paste0("p = ",p$p))+
  geom_hline(yintercept = 0.5,lty = 5,color = "grey")+
  scale_y_continuous(limits = c(0,1.05))+
  theme_classic()+
  scale_shape_manual(values = c(17,17))+
  scale_color_manual(values = c("#00A087CC","brown3"))+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(size = 12,face = "bold"),
        axis.text.x = element_text(size = 12,face = "bold"))+
  geom_segment(aes(x = 1.12, y = 1, xend = 1.88, yend = 1),
               linetype = "solid", color = "black",size = 0.1)


roc_result <- roc(lcmm_p$Class,lcmm_p$MM)
xy_position <- coords(roc_result, "best") %>% .[1,]
xy_position$AUC <- auc(roc_result)
SVM_LCMM_ROC_top200 <- ggroc(roc_result,size = 0.8,colour = "darkblue",legacy.axes = TRUE)+
  theme_bw()+
  labs(x= "1-Specificity",y = "Sensitivity",title ="RFE-TOP200 features (RAW + SVM)" )+
  annotate("text",x =0.75,y=0.09, label = paste("AUC :",format(round(xy_position$AUC,3),nsmall = 3)), colour = "black",size = 5)+
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1),color = "grey",
               linetype = "dashed")+  
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),
        plot.subtitle = element_text(hjust = 0.5,size = 15,face = "bold"),
        axis.title = element_text(face = "bold",size =15),
        panel.grid = element_blank())


cowplot::plot_grid(SVM_HCMM_point_all,NULL,SVM_HCMM_ROC_all,NULL,
                   NULL,NULL,NULL,NULL,
                   SVM_HCMM_point_top200,NULL,SVM_HCMM_ROC_top200,NULL,
                   ncol = 4,nrow = 3,axis = "bltr",align = "hv",
                   rel_widths = c(1,0.05,1,0.05),
                   rel_heights = c(1,0.05,1))
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/HCMM All+top200 RAW+SVM.tiff",units = "mm",height = 200,width = 230,dpi = 600)
cowplot::plot_grid(SVM_LCMM_point_all,NULL,SVM_LCMM_ROC_all,NULL,
                   NULL,NULL,NULL,NULL,
                   SVM_LCMM_point_top200,NULL,SVM_LCMM_ROC_top200,NULL,
                   ncol = 4,nrow = 3,axis = "bltr",align = "hv",
                   rel_widths = c(1,0.05,1,0.05),
                   rel_heights = c(1,0.05,1))
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/LCMM All+top200 RAW+SVM.tiff",units = "mm",height = 200,width = 230,dpi = 600,bg = "white")


#########top200波数光谱图##########
####PLS####
HCMM_features200_PLS <- PLS_VIP_top200[["HCMM"]]
LCMM_features200_PLS <- PLS_VIP_top200[["LCMM"]]
HCMM_features200_SVM <- SVM_VIP_top200[["HCMM"]]
LCMM_features200_SVM <- SVM_VIP_top200[["LCMM"]]

library(ggthemr)
ggthemr('solarized')
tr_raw_data <- ydata[["RAW"]]
top200VIPHCMM <- tr_raw_data %>% 
  subset(Class %in% c("HC","MM")) %>% 
  select(all_of(c("File Name","Class" ,HCMM_features200_PLS))) %>% 
  mutate(Class = factor(paste0(Class," (TOP200 VIP)"))) %>% 
  reshape2::melt(id  = c("File Name","Class"))

top200SVMHCMM <- tr_raw_data %>% 
  subset(Class %in% c("HC","MM")) %>% 
  select(all_of(c("File Name","Class" ,HCMM_features200_SVM))) %>% 
  mutate(Class = factor(paste0(Class," (TOP200 RFE)"))) %>% 
  reshape2::melt(id  = c("File Name","Class"))


HCMM <-  tr_raw_data %>% 
  subset(Class %in% c("HC","MM")) %>%
  reshape2::melt(id  = c("File Name","Class")) %>% 
  rbind(top200VIPHCMM,top200SVMHCMM )
HCMM$variable <- HCMM$variable %>% as.character() %>% as.numeric()


HCMM$Class <- factor(HCMM$Class,levels = c("HC","MM","HC (TOP200 VIP)","MM (TOP200 VIP)","HC (TOP200 RFE)","MM (TOP200 RFE)"),ordered = T)
ggthemr('flat')
values= c(LC = "#00A087CC",`LC (TOP200 VIP)` = "#08A28F",`LC (TOP200 RFE)` = "#91D1C2CC",
          HC = "royalblue1",`HC (TOP200 VIP)` = "#4DBBD5CC",`HC (TOP200 RFE)` = "skyblue",
          MM = "brown3",`MM (TOP200 VIP)` = "#E64B35CC",`MM (TOP200 RFE)` = "#F39B7FCC")
hm <- ggplot(HCMM,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  labs(title = paste0("HC & MM Spectra - RAW Trainset in HCMM"),x = "Wavenumber",y="Absorbance")+
  scale_color_manual(values = values) +
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"))

HCMM$Group <- ifelse(grepl("HC",HCMM$Class) == TRUE,"HC","MM");hm
HC_MM <- HCMM %>% split(.$Group)

HC_hcmm <- HC_MM[["HC"]] %>% 
  mutate(Class = factor(Class,levels = c("HC","HC (TOP200 VIP)","HC (TOP200 RFE)"),ordered = T))

hm_HC <- ggplot(HC_hcmm,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  scale_color_manual(values = values) +
  labs(title = paste0("HC Spectra - RAW Trainset in HCMM"),x = "Wavenumber",y="Absorbance")+
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"));hm_HC

MM_hcmm <- HC_MM[["MM"]] %>% 
  mutate(Class = factor(Class,levels = c("MM","MM (TOP200 VIP)","MM (TOP200 RFE)"),ordered = T))

hm_MM <- ggplot(MM_hcmm,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  scale_color_manual(values = values) +
  labs(title = paste0("MM Spectra - RAW Trainset in HCMM"),x = "Wavenumber",y="Absorbance")+
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"));hm_MM

###lcmm####
top200VIPLCMM <- tr_raw_data %>% 
  subset(Class %in% c("LC","MM")) %>% 
  select(all_of(c("File Name","Class" ,LCMM_features200_PLS))) %>% 
  mutate(Class = factor(paste0(Class," (TOP200 VIP)"))) %>% 
  reshape2::melt(id  = c("File Name","Class"))

top200SVMLCMM <- tr_raw_data %>% 
  subset(Class %in% c("LC","MM")) %>% 
  select(all_of(c("File Name","Class" ,LCMM_features200_SVM))) %>% 
  mutate(Class = factor(paste0(Class," (TOP200 RFE)"))) %>% 
  reshape2::melt(id  = c("File Name","Class"))


LCMM <-  tr_raw_data %>% 
  subset(Class %in% c("LC","MM")) %>%
  reshape2::melt(id  = c("File Name","Class")) %>% 
  rbind(top200VIPLCMM,top200SVMLCMM )
LCMM$variable <- LCMM$variable %>% as.character() %>% as.numeric()


LCMM$Class <- factor(LCMM$Class,levels = c("LC","MM","LC (TOP200 VIP)","MM (TOP200 VIP)","LC (TOP200 RFE)","MM (TOP200 RFE)"),ordered = T)

lm <- ggplot(LCMM,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  scale_color_manual(values = values) +
  labs(title = paste0("LC & MM Spectra - RAW Trainset in LCMM"),x = "Wavenumber",y="Absorbance")+
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"));lm

LCMM$Group <- ifelse(grepl("LC",LCMM$Class) == TRUE,"LC","MM")
LC_MM <- LCMM %>% split(.$Group)

LC_lcmm <- LC_MM[["LC"]] %>% 
  mutate(Class = factor(Class,levels = c("LC","LC (TOP200 VIP)","LC (TOP200 RFE)"),ordered = T))

lm_LC <- ggplot(LC_lcmm,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  scale_color_manual(values = values) +
  labs(title = paste0("LC Spectra - RAW Trainset in LCMM"),x = "Wavenumber",y="Absorbance")+
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"));lm_LC

MM_lcmm <- LC_MM[["MM"]] %>% 
  mutate(Class = factor(Class,levels = c("MM","MM (TOP200 VIP)","MM (TOP200 RFE)"),ordered = T))

lm_MM <- ggplot(MM_lcmm,aes(x = variable,y = value,color = Class))+
  geom_line(size = 1,lty = 8)+
  scale_color_manual(values = values) +
  labs(title = paste0("MM Spectra - RAW Trainset in LCMM"),x = "Wavenumber",y="Absorbance")+
  theme_classic()+
  theme(
    panel.background = element_blank(),
    axis.text = element_text(color='black',size = 10), 
    axis.title  = element_text(color='black',size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5,color='black',size = 15, face = "bold"),
    legend.text  = element_text(color='black',size = 10),
    legend.title = element_text(color='black',size = 10, face = "bold"));lm_MM

cowplot::plot_grid(NULL,hm,NULL,hm_HC,NULL,hm_MM,
                   NULL,NULL,NULL,NULL,NULL,NULL,
                   NULL,lm,NULL,lm_LC,NULL,lm_MM,
                   nrow = 3,ncol = 6,
                   rel_widths = c(0.05,1,0.05,1,0.05,1),
                   rel_heights = c(1,0.05,1),
                   axis = "bltr",align = "hv")
ggsave(filename = "lineplot.tiff",units = "mm",dpi = 600,height = 300,width = 600)



###########################特征筛选#####################
library(tibble)
library(scales)
raw1 <- read.csv("R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/raw.csv")
wavelengths <- as.numeric(substr(colnames(raw1)[3:ncol(raw1)], 2, 8))#提取波数-Mac里正常显示，所以2,8改成1,7就行
colnames(raw1)[3:ncol(raw1)] <- wavelengths
HCmean <-  raw1 %>% subset(Class %in% c("HC"))
HCmean1 <- colMeans(HCmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(HCmean1) <- c("HC")
MMmean <-  raw1 %>% subset(Class %in% c("MM"))
MMmean1 <- colMeans(MMmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(MMmean1) <- c("MM")
LCmean <-  raw1 %>% subset(Class %in% c("LC"))
LCmean1 <- colMeans(LCmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(LCmean1) <- c("LC")
M1 <- rbind(HCmean1,MMmean1,LCmean1) %>% as.data.frame() %>% rownames_to_column( var = "Class")
M5 <- reshape2::melt(M1, id.vars = "Class", variable.name = "Wavenumber", value.name = "Absorbance")
names(M5) <- c("Class","Wavenumber","Absorbance")
HCmean <-  tr_raw_data %>% subset(Class %in% c("HC"))
HCmean1 <- colMeans(HCmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(HCmean1) <- c("HC")
MMmean <-  tr_raw_data %>% subset(Class %in% c("MM"))
MMmean1 <- colMeans(MMmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(MMmean1) <- c("MM")
LCmean <-  tr_raw_data %>% subset(Class %in% c("LC"))
LCmean1 <- colMeans(LCmean[,-c(1,2)]) %>% as.data.frame() %>% t()
rownames(LCmean1) <- c("LC")
M <- rbind(HCmean1,MMmean1,LCmean1) %>% as.data.frame() %>% rownames_to_column( var = "Class")
#wide data to long data
M6 <- reshape2::melt(M, id.vars = "Class", variable.name = "Wavenumber", value.name = "Absorbance")
names(M6) <- c("Class","Wavenumber","Absorbance")

# 筛选出MM类别的数据
M1_MM <- M5 %>% filter(Class == "MM")#全
M1_MM$Wavenumber <- as.numeric(as.character(M1_MM$Wavenumber))
M_MM <- M6 %>% filter(Class == "MM")#去3

# 创建基础图形（全部波数，只显示MM类别）
M1_MM_filtered <- M1_MM[M1_MM$Wavenumber %in% M_MM$Wavenumber, ] %>% 
  select(Wavenumber, Class) %>%          # 只保留 Wavenumber 和 Class 列
  left_join(                             # 左连接
    M1_MM %>% select(Wavenumber, Absorbance),  # 从 M1_MM 中选择 Wavenumber 和 Absorbance
    by = "Wavenumber"                    # 根据 Wavenumber 连接
  ) %>%
  select(Class, Wavenumber, Absorbance)  # 重新排列列顺序
M1_MM_filtered$Wavenumber <- as.numeric(as.character(M1_MM_filtered$Wavenumber))

M1_MM_LCMMSVM <- M1_MM[M1_MM$Wavenumber %in% LCMM_features200_SVM, ]
M1_MM_LCMMPLS <- M1_MM[M1_MM$Wavenumber %in% LCMM_features200_PLS, ]
M1_MM_HCMMSVM <- M1_MM[M1_MM$Wavenumber %in% HCMM_features200_SVM, ]
M1_MM_HCMMPLS <- M1_MM[M1_MM$Wavenumber %in% HCMM_features200_PLS, ]
write.csv(HCMM_features200_PLS, file = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/HCMM_features200_PLS.csv", row.names = FALSE)
# 创建基础图形（全部波数，只显示MM类别）
LCMMSVM <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance,group = Class)) +
  geom_line(size = 0.5, linetype = "dashed",color = "grey") +  # 全部波数使用虚线
  labs(title="Top200 Features LC vs MM SVM") +
  theme_bw()+
  theme(plot.title = element_text(size = 10, hjust = 0.5,face = "bold")) +
  theme(panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        plot.title = element_text(color = 'black', size = 15),
        legend.text = element_text(color = 'black', size = 12),
        legend.title = element_text(color = 'black', size = 12),
        panel.grid = element_blank())+
  geom_point(data = M1_MM_filtered, 
             mapping = aes(x = Wavenumber, y = Absorbance),
             color = "royalblue4",
             size = 1.2, 
             linetype = "solid")+
  geom_point(data = M1_MM_LCMMSVM, 
             mapping = aes(x = Wavenumber, y = Absorbance), 
             color = "black", 
             shape = 23, 
             size = 2,
             fill = "powderblue",          # 填充色（如果形状可填充）
             stroke = 0.3);LCMMSVM  # SVM特征点，使用红色三角形
LCMMPLS <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance,group = Class)) +
  geom_line(size = 0.5, linetype = "dashed",color = "grey") +  # 全部波数使用虚线
  labs(title="Top200 Features LC vs MM PLS") +
  theme_bw()+
  theme(plot.title = element_text(size = 10, hjust = 0.5,face = "bold")) +
  theme(panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        plot.title = element_text(color = 'black', size = 15),
        legend.text = element_text(color = 'black', size = 12),
        legend.title = element_text(color = 'black', size = 12),
        panel.grid = element_blank())+
  geom_point(data = M1_MM_filtered, 
             mapping = aes(x = Wavenumber, y = Absorbance),
             color = "royalblue4",
             size = 1.2, 
             linetype = "solid")+
  geom_point(data = M1_MM_LCMMPLS, 
             mapping = aes(x = Wavenumber, y = Absorbance), 
             color = "black", 
             shape = 23, 
             size = 2,
             fill = "powderblue",          # 填充色（如果形状可填充）
             stroke = 0.3);LCMMPLS  # SVM特征点，使用红色三角形


cowplot::plot_grid(NULL,LCMMSVM,NULL,LCMMPLS,NULL,
                   nrow = 1,ncol = 5,
                   rel_widths = c(0.05,1,0.05,1,0.05,1),
                   rel_heights = c(1,0.05,1),
                   axis = "bltr",align = "hv")
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/top200featureLCMM.tiff",units = "mm",dpi = 600,height = 120,width = 300)
HCMMPLS <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance,group = Class)) +
  geom_line(size = 0.5, linetype = "dashed",color = "grey") +  # 全部波数使用虚线
  labs(title="Top200 Features HC vs MM PLS") +
  theme_bw()+
  theme(plot.title = element_text(size = 10, hjust = 0.5,face = "bold")) +
  theme(panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        plot.title = element_text(color = 'black', size = 15),
        legend.text = element_text(color = 'black', size = 12),
        legend.title = element_text(color = 'black', size = 12),
        panel.grid = element_blank())+
  geom_point(data = M1_MM_filtered, mapping = aes(x = Wavenumber, y = Absorbance),color = "royalblue4",size = 1.2, linetype = "solid")+
  geom_point(data = M1_MM_HCMMPLS, mapping = aes(x = Wavenumber, y = Absorbance), 
             color = "black", shape = 23, size = 2,fill = "powderblue",          # 填充色（如果形状可填充）
             stroke = 0.3);HCMMPLS  # SVM特征点，使用红色三角形
HCMMSVM <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance,group = Class)) +
  geom_line(size = 0.5, linetype = "dashed",color = "grey") +  # 全部波数使用虚线
  labs(title="Top200 Features HC vs MM SVM") +
  theme_bw()+
  theme(plot.title = element_text(size = 10, hjust = 0.5,face = "bold")) +
  theme(panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        plot.title = element_text(color = 'black', size = 15),
        legend.text = element_text(color = 'black', size = 12),
        legend.title = element_text(color = 'black', size = 12),
        panel.grid = element_blank())+
  geom_point(data = M1_MM_filtered, mapping = aes(x = Wavenumber, y = Absorbance),color = "royalblue4",size = 1.2, linetype = "solid")+
  geom_point(data = M1_MM_HCMMSVM, mapping = aes(x = Wavenumber, y = Absorbance), 
             color = "black", shape = 23, size = 2,fill = "powderblue",          # 填充色（如果形状可填充）
             stroke = 0.3);HCMMSVM  # SVM特征点，使用红色三角形
cowplot::plot_grid(NULL,HCMMSVM,NULL,HCMMPLS,NULL,
                   nrow = 1,ncol = 5,
                   rel_widths = c(0.05,1,0.05,1,0.05,1),
                   rel_heights = c(1,0.05,1),
                   axis = "bltr",align = "hv")
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/top200featureHCMM.tiff",units = "mm",dpi = 600,height = 120,width = 300)
##合起来
M1_MM_LCMMSVM$Method <- "SVM-RFE(TOP200)"
M1_MM_LCMMPLS$Method <- "PLS-VIP(TOP200)"

# 合并数据
combined_featuresLCMM <- rbind(M1_MM_LCMMSVM, M1_MM_LCMMPLS)

# 创建组合图
combined_plotLCMM <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance, group = Class)) +
  geom_line(size = 0.5, linetype = "dashed", color = "grey") +
  labs(title = "Top200 Features LC vs MM") +
  theme_bw() +
  theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
        panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        legend.text = element_text(color = 'black', size = 10),
        legend.title = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.785,0.5),
        legend.key.size = unit(0.5, "cm") ,
        panel.grid = element_blank()) +
  geom_point(data = M1_MM_filtered, 
             mapping = aes(x = Wavenumber, y = Absorbance),
             color = "royalblue4",
             size = 1.2) +
  geom_point(data = combined_featuresLCMM, 
             mapping = aes(x = Wavenumber, y = Absorbance, fill = Method, shape = Method), # 同时映射fill和shape
             color = "black", 
             size = 2,
             stroke = 0.3) +
  scale_fill_manual(values = c("SVM-RFE(TOP200)" = "powderblue", "PLS-VIP(TOP200)" = "orange")) +
  scale_shape_manual(values = c("SVM-RFE(TOP200)" = 23, "PLS-VIP(TOP200)" = 22));combined_plotLCMM

M1_MM_HCMMSVM$Method <- "SVM-RFE(TOP200)"
M1_MM_HCMMPLS$Method <- "PLS-VIP(TOP200)"

# 合并数据
combined_featuresHCMM <- rbind(M1_MM_HCMMSVM, M1_MM_HCMMPLS)

# 创建组合图
combined_plotHCMM <- ggplot(data = M1_MM, mapping = aes(x = Wavenumber, y = Absorbance, group = Class)) +
  geom_line(size = 0.5, linetype = "dashed", color = "grey") +
  labs(title = "Top200 Features HC vs MM") +
  theme_bw() +
  theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
        panel.background = element_blank(),
        axis.line = element_blank(),
        axis.text = element_text(color = 'black', size = 15), 
        axis.title = element_text(color = 'black', size = 15, face = "bold"),
        legend.text = element_text(color = 'black', size = 10),
        legend.title = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.785,0.5),
        legend.key.size = unit(0.5, "cm") ,
        panel.grid = element_blank()) +
  geom_point(data = M1_MM_filtered, 
             mapping = aes(x = Wavenumber, y = Absorbance),
             color = "royalblue4",
             size = 1.2) +
  geom_point(data = combined_featuresHCMM, 
             mapping = aes(x = Wavenumber, y = Absorbance, fill = Method, shape = Method), # 同时映射fill和shape
             color = "black", 
             size = 2,
             stroke = 0.3) +
  scale_fill_manual(values = c("SVM-RFE(TOP200)" = "powderblue", "PLS-VIP(TOP200)" = "orange")) +
  scale_shape_manual(values = c("SVM-RFE(TOP200)" = 23, "PLS-VIP(TOP200)" = 22));combined_plotHCMM
cowplot::plot_grid(NULL,combined_plotLCMM,NULL,combined_plotHCMM,NULL,
                   nrow = 1,ncol = 5,
                   rel_widths = c(0.05,1,0.05,1,0.05,1),
                   rel_heights = c(1,0.05,1),
                   axis = "bltr",align = "hv")
ggsave(filename = "R:/Near-infrared spectroscopy combined with machine learning for plasma-based differential diagnosis of malignant mesothelioma（peerj）/modelEvaluationo3/top200feature.tiff",units = "mm",dpi = 600,height = 120,width = 300,bg = "white")
