set.seed(71) #set.seed(69) #set.seed(70) library(randomForest) modules_5<-WGCNA_matrix[,] #5000 #modules_5<-as.data.frame(t(RNAseq1)) #modules_5<-WGCNA_matrix[,as.character(modules_m$gene[modules_m$modules==6|modules_m$modules==7|modules_m$modules==19|modules_m$modules==10|modules_m$modules==12|modules_m$modules==15|modules_m$modules==21|modules_m$modules==22])] #几个module的基因 #modules_5<-modules_7[,1:50] colnames(modules_5)<-paste("gene",c(1:ncol(modules_5)),sep="") group = rep(NA,108) modules_5<-as.data.frame(cbind(modules_5,group)) modules_5$group[1:20]<-"c" modules_5$group[21:70]<-"MMA" modules_5$group[71:108]<-"SA" modules_5$group<-as.factor(modules_5$group) rf1 <- randomForest(group ~ ., data = modules_5,importance=TRUE,proximity=TRUE) # whole genes estimated error rates set.seed(71) # rf1 <- randomForest(group ~ .,ntree=1000, data = modules_5,importance=TRUE,proximity=TRUE, # method = "repeatedcv",number = 10, repeats = 10, # mtry=1) print(rf1) #print(importance(rf1,type = 2)) sig<-as.matrix(importance(rf1,type = 2)) sig2<-as.matrix(sig[order(sig,decreasing = T),]) #modules_6<-WGCNA_matrix[,] #5000个总基因 #modules_6<-WGCNA_matrix[,as.character(modules_m$gene[modules_m$modules==6|modules_m$modules==7|modules_m$modules==19|modules_m$modules==10|modules_m$modules==12|modules_m$modules==15|modules_m$modules==21|modules_m$modules==22])] #几个module的基因 record <- array(NA, dim=c(50,2)) g=1 modules_6<-modules_5 #for (num in c(4000,3000,2000,1000,900,800,700,600,500,400,300,200,100,90,80,70,60,55,50,45,40,35,30,25,20,15,10)) { #5000个总基因 for (num in c(4000,3000,2000,1000,900,800,700,600,500,400,300,200,100,90,80,70,60,55,50)) { #5000个总基因 #for (num in seq(700,50,-50)) { #num<-2500 try= rep(NA,num) try[1:num]<-sub("gene","",rownames(sig2)[1:num]) modules_5<-modules_6[,as.numeric(try)] modules_6<-modules_5 colnames(modules_5)<-paste("gene",c(1:num),sep="") group = rep(NA,108) modules_5<-as.data.frame(cbind(modules_5,group)) modules_5$group[1:20]<-"c" modules_5$group[21:70]<-"MMA" modules_5$group[71:108]<-"SA" modules_5$group<-as.factor(modules_5$group) rf <- randomForest(group ~ ., data = modules_5,importance=TRUE,proximity=TRUE) # rf <- randomForest(group ~ ., data = modules_5,importance=TRUE,proximity=TRUE, # method = "repeatedcv",number = 10, repeats = 10) # rf <- randomForest(group ~ ., ntree=1000,data = modules_5,importance=TRUE,proximity=TRUE, # method = "repeatedcv",number = 10, repeats = 10) print(rf) record[g,1]<-num record[g,2]<-rf$err.rate[500,1] g=g+1 #print(importance(rf,type = 2)) sig<-as.matrix(importance(rf,type = 2)) sig2<-as.matrix(sig[order(sig,decreasing = T),]) } ########## overlapgene overlapgene=read.table("37gene.txt",stringsAsFactors = F,header=F,sep = "\n") overlap_matrix<-WGCNA_matrix[,match(overlapgene[,1],colnames(WGCNA_matrix))] #as.matrix(colnames(overlap_matrix)) set.seed(71) modules_5<-overlap_matrix #colnames(modules_5)<-paste("gene",c(1:ncol(modules_5)),sep="") colnames(modules_5)[1]<-"HLA_DPB1" group = rep(NA,108) modules_5<-as.data.frame(cbind(modules_5,group)) modules_5$group[1:20]<-"c" modules_5$group[21:70]<-"MMA" modules_5$group[71:108]<-"SA" modules_5$group<-as.factor(modules_5$group) #tenfold CV rf <- randomForest(group ~ ., ntree=1000,data = modules_5,importance=TRUE,proximity=TRUE, method = "repeatedcv",number = 10, repeats = 10, mtry=1)