--- title: "Data_acquisition" author: "SL" date: "2023/10/18" output: html_document --- ```{r} rm(list = ls()) gc() getwd() ``` ```{r} library(data.table) library(openxlsx) library(tidyverse) library(VennDiagram) library(RColorBrewer) library(clusterProfiler) ``` # 定义自噬相关基因 ```{r} HADB <- read.xlsx("Autophagy_genes/HADB.xlsx") head(HADB) HADB <- HADB %>% distinct(Symbol,.keep_all = T) HAMDB <- fread("Autophagy_genes/HAMDB.csv") names(HAMDB) HAMDB <- HAMDB %>% filter(Species_Evidence=="Human") %>% dplyr::select(Symbol) %>% distinct(Symbol,.keep_all=T) head(HAMDB) ``` 取交叉 ```{r} Autophagy <- HAMDB %>% inner_join(HADB) Autophagy fwrite(Autophagy,"Autophagy_genes/Autophagy.txt",quote = T,sep="\t") ``` 韦恩图 ```{r} A <- HADB$Symbol B <- HAMDB$Symbol # 定义颜色 display.brewer.all(type='qual') #从内置色板Set3中选出4个颜色使用。 brewer.pal(3,'Set1') color <- c("#E41A1C","#377EB8") venn.plot <- venn.diagram( list(HADB=A, HAMDB=B), filename= NULL, #韦恩图的名字 imgaetype= "pdf", lty = 1, lwd = 3, #边框线宽度 fontface="bold", #标签字体 col = "black", #圈的颜色 fill = color,#对应每个圈的颜色,有几个数据集,就需要有相应数量的颜色 alpha = 0.60, #透明度 cat.col = "black",#此处设置每个数据集的名称颜色,也可以使用c()函数输入颜色 cat.cex = 1, #每个区域label名称的大小 cat.fontface = "bold", margin = 0.1, cex = 2.0,#标签字体大小 label.col = "black", hyper.test=T ) pdf(file="Autophagy_genes/Autophagy_genes.pdf") grid.draw(venn.plot) dev.off() ``` # 获取I期LUAD数据 TCGA ```{r} # exp TCGA_exp <- fread("/home/rstudio/Dataset/TCGA/LUAD/TCGA.LUAD.sampleMap_HiSeqV2.gz", data.table = F) head(TCGA_exp) rownames(TCGA_exp) <- TCGA_exp$sample TCGA_exp <- TCGA_exp %>% dplyr::select(ends_with("01")) colnames(TCGA_exp) <- str_sub(colnames(TCGA_exp),1,12) head(TCGA_exp) ``` 选取生存期完整的I期患者临床数据 ```{r} TCGA_clinical <- fread("/home/rstudio/Dataset/TCGA/LUAD/clinical.csv",data.table = F) head(TCGA_clinical) table(TCGA_clinical$Stage) TCGA_clinical <- TCGA_clinical %>% filter(Stage=="I",PFI.time>0) fwrite(TCGA_clinical,"TCGA/TCGA_clinical.txt",quote = T,sep = "\t") head(TCGA_clinical) ``` 选取相应的表达矩阵 ```{r} TCGA_exp <- TCGA_exp[,TCGA_clinical$Patients_ID] head(TCGA_exp) TCGA_exp <- as.data.frame(t(TCGA_exp)) TCGA_exp <- data.frame(Patients_ID=rownames(TCGA_exp),TCGA_exp) fwrite(TCGA_exp,"TCGA/TCGA_exp.txt",quote = T,sep = "\t") ``` GSE