library(tidyverse) list.files(path = "../input") library('tensorflow') library('keras') library('cowplot') library('tidyverse') library('ggplot2') library('gridExtra') library('imager') library('caret') set.seed(42) root_dir <- "/kaggle/input/brain-mri-images-for-brain-tumor-detection/brain_tumor_dataset" class_names <- list.dirs(root_dir, full.names = FALSE, recursive = FALSE) class_names for (class_name in class_names) { class_path <- file.path(root_dir, class_name) num_files <- length(list.files(class_path, full.names = TRUE)) cat("Class", class_name, ":", num_files, "\n") } for (class_name in class_names) { class_path = file.path(root_dir, class_name) files = list.files(class_path, full.names = TRUE) indices <- sample(1:length(files)) train_indices <- indices[1:floor(0.8 * length(files))] valid_indices <- indices[(floor(0.8 * length(files)) + 1):(floor((0.8 + 0.10) * length(files)))] test_indices <- indices[(floor((0.8 + 0.10) * length(files)) + 1):length(files)] class_train_path <- file.path("train_set", class_name) class_valid_path <- file.path("valid_set", class_name) class_test_path <- file.path("test_set", class_name) dir.create(class_train_path, recursive = TRUE, showWarnings = FALSE) dir.create(class_valid_path, recursive = TRUE, showWarnings = FALSE) dir.create(class_test_path, recursive = TRUE, showWarnings = FALSE) for (i in train_indices) { file.copy(files[i], class_train_path) } for (i in valid_indices) { file.copy(files[i], class_valid_path) } for (i in test_indices) { file.copy(files[i], class_test_path) } } train_path <- "train_set" valid_path <- "valid_set" test_path <- "test_set" train_files <- list.files(train_path, recursive = TRUE, full.names = TRUE) valid_files <- list.files(valid_path, recursive = TRUE, full.names = TRUE) test_files <- list.files(test_path, recursive = TRUE, full.names = TRUE) num_train_samples <- length(train_files) num_valid_samples <- length(valid_files) num_test_samples <- length(test_files) num_total_samples <- sum(c(num_train_samples, num_valid_samples, num_test_samples)) cat('Num Total Samples:', num_total_samples, '\n') cat('Num Train Samples:', num_train_samples, '\n') cat('Num Valid Samples:', num_valid_samples, '\n') cat('Num Test Samples:', num_test_samples, '\n') my_bar <- barplot(c(num_train_samples, num_valid_samples, num_test_samples), names.arg = c("Train Set" , "Valid Set" , "Test Set"), col= c(rgb(0.3, 0.1, 0.4, 0.6), rgb(0.3, 0.5, 0.4, 0.6) , rgb(0.3, 0.9, 0.4, 0.6)), border = "black", horiz = FALSE, cex.axis = 0.8, cex.names = 0.8, axis.lty = 1, ylim = c(0, num_total_samples), main = "Distribution of Sets", xlab = "Set name", ylab = "Num. of samples in set" ) text(x = my_bar, y = c(num_train_samples, num_valid_samples, num_test_samples), label = c(num_train_samples, num_valid_samples, num_test_samples), pos = 3, cex = 0.8, col = "black") legend("topright", legend = c("Train Set" , "Valid Set" , "Test Set"), col = c(rgb(0.3, 0.1, 0.4, 0.6), rgb(0.3, 0.5, 0.4, 0.6) , rgb(0.3, 0.9, 0.4, 0.6)), bty = "n", pch = 20 , pt.cex = 2, cex = 0.8, horiz = FALSE, inset = c(0.05, 0.05)) all_images <- list.files(root_dir, full.names = TRUE, recursive = TRUE) sample_paths <- sample(all_images, 16) images <- lapply(sample_paths, function(image_file) { img <- load.image(image_file) img }) labels <- sapply(sample_paths, function(x) { basename(dirname(x)) }) par(mfrow = c(4, 4), mar = c(2, 2, 2, 2)) for (i in 1:16) { plot(images[[i]], main = paste("Tumor:", labels[i]), axes = FALSE) } train_datagen <- image_data_generator( rescale = 1/255, rotation_range = 20, width_shift_range = 0.1, height_shift_range = 0.1, shear_range = 0.1, zoom_range = 0.1, horizontal_flip = TRUE, fill_mode = "nearest" ) valid_datagen <- image_data_generator( rescale = 1/255 ) test_datagen <- image_data_generator( rescale = 1/255 ) train_generator <- flow_images_from_directory( train_path, train_datagen, target_size = c(299, 299), batch_size = 32, class_mode = "categorical" ) valid_generator <- flow_images_from_directory( valid_path, valid_datagen, target_size = c(299, 299), batch_size = 32, class_mode = "categorical" ) test_generator <- flow_images_from_directory( test_path, test_datagen, target_size = c(299, 299), batch_size = 32, class_mode = "categorical" ) base_model <- application_xception( weights = "imagenet", include_top = FALSE, input_shape = c(299, 299, 3) ) model <- keras_model_sequential() %>% base_model %>% layer_global_average_pooling_2d() %>% layer_batch_normalization() %>% layer_dense(units = 256, activation = "relu", kernel_regularizer = regularizer_l2(0.01)) %>% layer_batch_normalization() %>% layer_dropout(0.5) %>% layer_dense(units = length(class_names), activation = "softmax") freeze_weights(base_model) model %>% compile( optimizer = optimizer_adam(), loss = "categorical_crossentropy", metrics = c("accuracy") ) history <- model %>% fit( train_generator, epochs = 50, validation_data = valid_generator, ) history_df <- data.frame( epoch = 1:50, accuray = history$metrics$accuracy, val_accuracy = history$metrics$val_accuracy, loss = history$metrics$loss, val_loss = history$metrics$val_loss ) tail(history_df) accuracy_df <- data.frame(epoch = 1:50, train_acc = history$metrics$accuracy, val_acc = history$metrics$val_accuracy) ggplot(accuracy_df, aes(x = epoch)) + geom_line(aes(y = train_acc, color = "Train Accuracy")) + geom_line(aes(y = val_acc, color = "Validation Accuracy")) + labs(title = "Accuracy Curve", x = "Epoch", y = "Accuracy") + scale_color_manual("", breaks = c("Train Accuracy", "Validation Accuracy"), values = c("blue", "red")) + theme_minimal() loss_df <- data.frame(epoch = 1:50, train_acc = history$metrics$loss, val_acc = history$metrics$val_loss) ggplot(loss_df, aes(x = epoch)) + geom_line(aes(y = train_acc, color = "Train Loss")) + geom_line(aes(y = val_acc, color = "Validation Loss")) + labs(title = "Loss Curve", x = "Epoch", y = "Loss") + scale_color_manual("", breaks = c("Train Loss", "Validation Loss"), values = c("blue", "red")) + theme_minimal() pred_probs <- model %>% predict(test_generator) pred_labels <- apply(pred_probs, 1, which.max) - 1 true_labels <- test_generator$classes conf_matrix <- confusionMatrix(factor(pred_labels), factor(true_labels)) print(conf_matrix) accuracy <- conf_matrix$overall["Accuracy"] precision <- conf_matrix$byClass["Pos Pred Value"] recall <- conf_matrix$byClass["Sensitivity"] f1_score <- 2 * (precision * recall) / (precision + recall) cat("Accuracy:", accuracy, "\n") cat("Precision:", precision, "\n") cat("Recall:", recall, "\n") cat("F1 Score:", f1_score, "\n") images <- lapply(test_files, function(image_file) { img <- load.image(image_file) img }) labels <- sapply(test_files, function(x) { basename(dirname(x)) }) par(mfrow = c(5, 5), mar = c(3, 3, 3, 3)) for (i in 1:25) { plot(images[[i]], axes = FALSE) color <- ifelse(true_labels[i] == pred_labels[i], "blue", "red") title(main = paste("True:", class_names[true_labels[i]+1], "\nPredicted:", class_names[pred_labels[i]+1]), col.main = color) }