library(MASS) library(readr) hubGENG_class <- read_csv("随机森林专用表.csv") View(hubGENG_class) data <- hubGENG_class #查看数据 View(data) #查看变量名称 names(data) #查看数据集data中各变量基本统计信息 summary(data) #更改class格式 data$class <- factor(data$class,levels = c("Tumor","Normal"),ordered=TRUE) #查看变量类型 str(data) library(randomForest) set.seed(1) rf.model<-randomForest(class~KIF2C+CKS1B+NCAPH+TOP2A+CKS2+CDC6+BUB1B+PTTG1+ TK1+MELK+SPAG5+FEN1+CDK1+FOXM1+EZH2+CENPF+TPX2+ CDC20+KNTC1+PLK1+CDKN3+ZWINT+KIF14+GINS1+CCNB2+ KIF11+UBE2C+KIFC1+ESPL1+TTK+CCNB1+CD93+CLDN5+TEK+VWF+ ENG+ICAM2+CD34+ANGPT1+TIE1+CSF3+GJA4+ACE+NOTCH4+SELE+ PECAM1+KDR+SELP+CDH5+THBD,data=data) rf.model plot(rf.model) #绿线表示normal #红线表示tumor #黑线表示整体 #用which.min查找误差最低时的tree which.min(rf.model$err.rate[,1]) #40 set.seed(1) rf.model2<-randomForest(class~KIF2C+CKS1B+NCAPH+TOP2A+CKS2+CDC6+BUB1B+PTTG1+ TK1+MELK+SPAG5+FEN1+CDK1+FOXM1+EZH2+CENPF+TPX2+ CDC20+KNTC1+PLK1+CDKN3+ZWINT+KIF14+GINS1+CCNB2+ KIF11+UBE2C+KIFC1+ESPL1+TTK+CCNB1+CD93+CLDN5+TEK+VWF+ ENG+ICAM2+CD34+ANGPT1+TIE1+CSF3+GJA4+ACE+NOTCH4+SELE+ PECAM1+KDR+SELP+CDH5+THBD,data=data,ntree=40) rf.model2 #变量重要性评分,比较重要 varImpPlot(rf.model2) #保存图片