library(glmnet) #筛选特征变量 eset1<-read.table("data.txt",sep="\t",header=T) eset1$Label = as.factor(eset1$Label) x <- data.matrix(eset1[, 1:56]) y <- eset1[, 57] Lasso <- glmnet(x, y, alpha=1,family = 'binomial') set.seed(123) Lasso.cv<-cv.glmnet(x,y,alpha = 1,family = "binomial",type.measure = "auc" ) plot(Lasso.cv) get_coe <- function(the_fit,the_lamb){ Coefficients <- coef(the_fit, s = the_lamb) Active.Index <- which(Coefficients != 0) Active.Coefficients <- Coefficients[Active.Index] re <- data.frame(rownames(Coefficients)[Active.Index],Active.Coefficients) re <- data.table('var_names'=rownames(Coefficients)[Active.Index],'coef'=Active.Coefficients) re$expcoef <- exp(re$coef) return(re[order(expcoef)]) } get_coe(Lasso.cv,Lasso.cv$lambda.min) coef(Lasso.cv, s = "lambda.1se") eset55<-select(eset1,c("BFR", "WHR", "TG", "HbA1c", "LDL_C", "TC", "PSQI", "VFI", "Age", "SBP", "FBG", "ALT", "Creatinine", "ECGNOTE1", "ECGNOTE2", "Smoking1", "Smoking2", "Drinking1", "Drinking2","Label")) #将数据集划分为训练集和测试集 set.seed(123)#设置随机种子 ind <- sample(2, nrow(eset55), replace=TRUE, prob=c(0.7, 0.3)) train_data_new <-eset55[ind==1,]#训练数据 test_data_new <- eset55[ind==2,]#测试数据 #利用LASSO构建模型 x <- as.matrix(train_data_new[, 1:19]) y <- train_data_new[, 20] Lasso <- glmnet(x, y, alpha=1,family = 'binomial') Lasso_cv <-cv.glmnet(x,y,alpha = 1,family = "binomial",type.measure = "class" ) pre.train.Lasso = predict(Lasso_cv, newx=as.matrix(train_data_new[,1:19]), type = "response",s = Lasso_cv$lambda.min) reg_roc_Lasso <- roc(train_data_new$Label ~ pre.train.Lasso) pre.test.Lasso = predict(Lasso_cv, newx=as.matrix(test_data_new[,1:19]), type = "response",s = Lasso_cv$lambda.min) reg_roc_Lasso_test <- roc(test_data_new$Label ~ pre.test.Lasso) #利用svm建立支持向量机分类模型 library(e1071)#加载e1071包 #利用tune.svm()函数,来寻找svm()函数的最优参数。 tuned <- tune.svm(Label ~., data = train_data_new, gamma = 10^(-6:-1), cost = 10^(-2:2)) set.seed(123) svm.model<-svm(Label~., train_data_new,type="C",kernel = 'radial',gamma=tuned$best.parameters$gamma,cost=tuned$best.parameters$cost,probability = TRUE) summary(svm.model) #训练集 pre.train.SVM <- predict(svm.model,newdata = train_data_new) reg_roc_SVM <- roc(train_data_new$Label ~ as.numeric(as.character(pre.train.SVM))) #测试集 pre.test.SVM <- predict(svm.model,newdata = test_data_new) reg_roc_SVM_test <- roc(test_data_new$Label ~ as.numeric(as.character(pre.test.SVM))) pre.vali.SVM <- predict(svm.model,newdata = vali_data_new) reg_roc_SVM_vali <- roc(vali_data_new$Label ~ as.numeric(as.character(pre.vali.SVM))) temp_matrix <- data.frame(y = train_data_new$Label, predictor = reg_roc_SVM$predictor, stringsAsFactors = FALSE); temp_matrix[temp_matrix$predictor < 0.5, "predictor"] <- 0; temp_matrix[temp_matrix$predictor >= 0.5, "predictor"] <- 1; TP <- as.double(nrow(temp_matrix[temp_matrix$y == 1 & temp_matrix$predictor == 1, ])); FP <- as.double(nrow(temp_matrix[temp_matrix$y == 0 & temp_matrix$predictor == 1, ])); TN <- as.double(nrow(temp_matrix[temp_matrix$y == 0 & temp_matrix$predictor == 0, ])); FN <- as.double(nrow(temp_matrix[temp_matrix$y == 1 & temp_matrix$predictor == 0, ])); MCC <- (TP * TN - FP * FN) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)); #利用randomForest建立随机森林分类模型 library(randomForest) n<-length(names(train_data_new)) set.seed(9) min=100 num=0 for (i in 1:(n-1)){ mtry_fit<- randomForest(Label~., data=train_data_new, mtry=i) err<-mean(mtry_fit$err.rate) print(err) if(err= 0.5, "predictor"] <- 1; TP <- as.double(nrow(temp_matrix[temp_matrix$y == 1 & temp_matrix$predictor == 1, ])); FP <- as.double(nrow(temp_matrix[temp_matrix$y == 0 & temp_matrix$predictor == 1, ])); TN <- as.double(nrow(temp_matrix[temp_matrix$y == 0 & temp_matrix$predictor == 0, ])); FN <- as.double(nrow(temp_matrix[temp_matrix$y == 1 & temp_matrix$predictor == 0, ])); MCC <- (TP * TN - FP * FN) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)); pre.test.RF <- predict(RF.model,newdata = test_data_new,type="prob") reg_roc_RF_test <- roc(test_data_new$Label ~ pre.test.RF) library(PredictABEL) train_data_new$Label<-as.numeric(as.character(train_data_new$Label)) reclassification(data=train_data_new,cOutcome=20,predrisk1 = pre.train.Lasso, predrisk2 = pre.train.SVM,cutoff = c(0,0.5,1)) reclassification(data=train_data_new,cOutcome=20,predrisk1 = pre.train.Lasso, predrisk2 = pre.train.RF,cutoff = c(0,0.5,1)) reclassification(data=train_data_new,cOutcome=20,predrisk1 = pre.train.SVM, predrisk2 = pre.train.RF,cutoff = c(0,0.5,1)) test_data_new$Label<-as.numeric(as.character(test_data_new$Label)) reclassification(data=test_data_new,cOutcome=20,predrisk1 = pre.test.Lasso, predrisk2 = pre.test.SVM,cutoff = c(0,0.5,1)) reclassification(data=test_data_new,cOutcome=20,predrisk1 = pre.test.Lasso, predrisk2 = pre.test.RF,cutoff = c(0,0.5,1)) reclassification(data=test_data_new,cOutcome=20,predrisk1 = pre.test.SVM, predrisk2 = pre.test.RF,cutoff = c(0,0.5,1)