--- title: "Model_validation" author: "SL" date: "2023/10/20" output: html_document --- ```{r} library(tidyverse) library(data.table) library(openxlsx) library(survival) library(timeROC) ``` ```{r} choose_gene <- fread("TCGA/LASSO_coef.csv",data.table = F) choose_gene rownames(choose_gene) <- choose_gene$Gene coef <- choose_gene %>% dplyr::select(Coef) coef <- as.matrix(coef) coef ``` # GSE30219, GSE37745 ```{r} exp <- fread("/home/rstudio/Dataset/GEO/NSCLC/GSE30219/GSE30219_merge.txt") head(exp)[,1:5] table(exp$Stage) exp <- exp %>% filter(Stage=="I") %>% dplyr::select(Patients_ID,PFS.time,PFS,rownames(coef)) head(exp) ``` 选择hub基因的表达矩阵 ```{r} variables <- exp %>% dplyr::select(choose_gene$Gene) head(variables) ``` 需要计算预后指数的患者 ```{r} data_train_prediction <- exp %>% dplyr::select(Patients_ID) head(data_train_prediction) ``` 计算预后指数,按列添加到患者信息后面 ```{r} for (i in 1:10) { y=variables[,i]*coef[i,] data_train_prediction <- data.frame(data_train_prediction,y) } head(data_train_prediction) ``` 预后指数=各基因乘积和 ```{r} data_train_prediction$prediction <- rowSums(data_train_prediction[,2:ncol(data_train_prediction)]) data_train_prediction <- data_train_prediction %>% dplyr::select(Patients_ID,prediction) head(data_train_prediction) colnames(data_train_prediction)[2] <- "AutophagyScores" fwrite(data_train_prediction,"GEO/GSE30219/AutophagyScores.csv") head(data_train_prediction) ``` 合并临床数据 ```{r} clinical <- fread("/home/rstudio/Dataset/GEO/NSCLC/GSE30219/GSE30219_merge.txt",data.table = F) head(clinical)[,1:5] names(clinical)[1:20] clinical <- clinical %>% select(Patients_ID,PFS.time,PFS) data_merge <- data_train_prediction %>% left_join(clinical) head(data_merge) ``` ```{r} cutoff <- 365 time_roc_res <- timeROC( T = data_merge$PFS.time, delta = data_merge$PFS, marker = data_merge$AutophagyScores, cause = 1, weighting="marginal", times = c(1 * 365, 3 * 365, 5*365), ROC = TRUE, iid = TRUE ) #计算AUC值及其置信区间 time_roc_res$AUC confint(time_roc_res, level = 0.95)$CI_AUC #绘制time-dependent ROC曲线 plot(time_roc_res, time=1 * 365, col = "red", title = FALSE) plot(time_roc_res, time=3 * 365, add=TRUE, col="blue") plot(time_roc_res, time=5 * 365, add=TRUE, col="green") legend("bottomright",c("1 Years" ,"3 Years", "5 Years"), col=c("red", "blue", "green"), lty=1, lwd=2) #美化图 time_ROC_df <- data.frame( TP_1year = time_roc_res$TP[, 1], FP_1year = time_roc_res$FP[, 1], TP_3year = time_roc_res$TP[, 2], FP_3year = time_roc_res$FP[, 2], TP_5year = time_roc_res$TP[, 3], FP_5year = time_roc_res$FP[, 3] ) p <- ggplot(data = time_ROC_df) + geom_line(aes(x = FP_1year, y = TP_1year), linewidth = 1, color = "#BC3C29FF") + geom_line(aes(x = FP_3year, y = TP_3year), linewidth = 1, color = "#0072B5FF") + geom_line(aes(x = FP_5year, y = TP_5year), linewidth = 1, color = "#E18727FF") + geom_abline(slope = 1, intercept = 0, color = "grey", size = 1, linetype = 2) + theme_bw() + annotate("text", x = 0.75, y = 0.25, size = 4.5, label = paste0("AUC at 1 years = ", sprintf("%.3f", time_roc_res$AUC[[1]])), color = "#BC3C29FF" ) + annotate("text", x = 0.75, y = 0.15, size = 4.5, label = paste0("AUC at 3 years = ", sprintf("%.3f", time_roc_res$AUC[[2]])), color = "#0072B5FF" ) + annotate("text", x = 0.75, y = 0.05, size = 4.5, label = paste0("AUC at 5 years = ", sprintf("%.3f", time_roc_res$AUC[[3]])), color = "#E18727FF" ) + labs(x = "False positive rate", y = "True positive rate") + theme( axis.text = element_text(face = "bold", size = 11, color = "black"), axis.title.x = element_text(face = "bold", size = 14, color = "black", margin = margin(c(15, 0, 0, 0))), axis.title.y = element_text(face = "bold", size = 14, color = "black", margin = margin(c(0, 15, 0, 0))) )+ ggtitle("") plot(p) ggsave("GEO/GSE30219/ROC.pdf",p,width=6,height = 6) ```