# Required packages # install.packages(c("randomForest", "readxl", "writexl", "ggplot2", "dplyr")) library(randomForest) library(readxl) library(writexl) library(ggplot2) library(dplyr) # Read data features_df <- read_excel("C:/Users/12145/X_train.xlsx") labels_df <- read_excel("C:/Users/12145/y_train.xlsx") # Prepare data X_train <- as.data.frame(features_df) y_train <- as.factor(unlist(labels_df)) # Train random forest model model <- randomForest(x = X_train, y = y_train, importance = TRUE) # Get feature importance feature_importances <- importance(model, type = 1) importance_df <- data.frame( Feature = rownames(feature_importances), Importance = feature_importances[, 1] ) # Sort by importance importance_df <- importance_df %>% arrange(Importance) # Plot feature importance ggplot(importance_df, aes(x = Importance, y = reorder(Feature, Importance))) + geom_bar(stat = "identity", fill = "#4C9F70") + theme_minimal() + theme( axis.text.y = element_text(size = 6), axis.text.x = element_text(size = 8), plot.title = element_text(size = 14, face = "bold") ) + labs( title = "Feature Importance from Random Forest (Sorted)", x = "Feature Importance", y = "Features" ) # Save to Excel output_path <- "C:/Users/12145/feature_importances.xlsx" write_xlsx(importance_df, output_path) cat("Feature importance file saved to:", output_path, "\n")