# # Library install # library(e1071) # # Calculate AUC of SVM model # auc_svm <- function(trained_model, variables, outcomes) { prediction_results <- attr(predict(trained_model,variables,probability=TRUE),'probabilities')[,'Yes'] validation_results <- ifelse(outcomes=='Yes',1,0) performance(prediction(prediction_results,validation_results),'auc')@y.values[[1]] } # # Perform the feature selection by 5-fold cross-validation # set.seed(1) aucs <- NA for (index in 1:folds_in_cv) { # Split the train_data into train_in_loop and validate_in_loop samp <- sample_order[(validate_count*(index-1)+1):(validate_count*index)] train_in_loop <- train_data[-samp,] validate_in_loop <- train_data[samp,] # Develop the model by train_in_loop # TODO: Remove variable one by one model_svm <- svm(diarrhea~age+sex+rankin_scale+tube_feeding+ppi+h2_blocker+abx, probability=TRUE, data=train_in_loop) # Calculate AUC for validate_in_loop auc <- auc_svm(trained_model=model_svm, variables=validate_in_loop[,c('age','sex','rankin_scale','tube_feeding','ppi','h2_blocker','abx')], outcomes=validate_in_loop[,'diarrhea']) aucs <- append(aucs, auc) } mean(aucs, na.rm=TRUE) # # Set the hyperparameter gamma and cost by 5-fold cross-validation # param_results <- c(0, 0, 0) # Search gamma from 10^-5 to 10^5 for (gamma in 10^(seq(-5, 5, 0.1))) { # Search cost from 0.01 to 100 for (cost in 10^(seq(-2, 2, 0.1))) { set.seed(1) aucs <- NA for (index in 1:folds_in_cv) { # Split the data into train_in_loop and validate_in_loop samp <- sample_order[(validate_count*(index-1)+1):(validate_count*index)] train_in_loop <- train_data[-samp,] validate_in_loop <- train_data[samp,] # Develop the model by train_in_loop model_svm <- svm(diarrhea~age+sex+rankin_scale+tube_feeding+ppi+abx, # H2RA was removed gamma=gamma, cost=cost, probability=TRUE, data=train_in_loop) # Calculate AUC for validate_in_loop auc <- auc_svm(trained_model=model_svm, variables=validate_in_loop[,c('age','sex','rankin_scale','tube_feeding','ppi','abx')], outcomes=validate_in_loop[,'diarrhea']) aucs <- append(aucs, auc) } param_results <- rbind(param_results, c(gamma, cost, mean(aucs, na.rm=TRUE))) } } head(param_results[order(param_results[,3], decreasing=TRUE),])