# Load the R package library(readxl) library(dplyr) library(gtsummary) library(gt) library(regplot) library(pROC) library(ggplot2) library(rms) library(rmda) # read data data <- read_excel("path.xlsx") # The data of training set, test set and verification set are extracted respectively. train_data <- subset(data, Group == 0) test_data <- subset(data, Group == 1) validation_data <- subset(data, Group == 2) # Construct a logistic regression model train_fit<-glm(Y~X1+X2, data=train_data, family=binomial()) test_fit<-glm(Y~X1+X2, data=test_data, family=binomial()) validation_fit<-glm(Y~X1+X2, data=validation_data, family=binomial()) train_fit test_fit validation_fit # plot nomogram nomogram <- regplot(train_fit,observation=train_data[15,],leftlabel=T,cexvars=1, center=T,subticks=T,droplines=T,title='nomogram', points=T,odds=F,showP=T,dencol='grey',boxcol='grey', rank='sd',interval='confidence',clickable=F) # Calculate the model prediction probability train_data$prob <- predict(train_fit, type = "response") test_data$prob <- predict(test_fit, type = "response") validation_data$prob <- predict(validation_fit, type = "response") # plot the ROC curve of the training set and save it as a PDF file train_roc_curve <- roc(train_data$Y, train_data$prob) train_auc_value <- round(train_roc_curve$auc, 3) train_ci_values <- ci(train_roc_curve) pdf_path <- "path.pdf" pdf(pdf_path, width = 8, height = 8) plot(train_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Training set ROC') legend(x = "bottomright", legend = paste0('AUC = ', train_auc_value, ' (95% CI ', round(train_ci_values[1], 2), '-', round(train_ci_values[3], 2), ')'), lty = 1) dev.off() # plot the ROC curve of the testing set and save it as a PDF file test_roc_curve <- roc(test_data$Y, test_data$prob) test_auc_value <- round(test_roc_curve$auc, 3) test_ci_values <- ci(test_roc_curve) pdf_path <- "path.pdf" pdf(pdf_path, width = 8, height = 8) plot(test_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Testing set ROC') legend(x = "bottomright", legend = paste0('AUC = ', test_auc_value, ' (95% CI ', round(test_ci_values[1], 2), '-', round(test_ci_values[3], 2), ')'), lty = 1) dev.off() # plot the ROC curve of the validation set and save it as a PDF file validation_roc_curve <- roc(validation_data$Y, validation_data$prob) validation_auc_value <- round(validation_roc_curve$auc, 3) validation_ci_values <- ci(validation_roc_curve) pdf_path <- "path.pdf" pdf(pdf_path, width = 8, height = 8) plot(validation_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Validation set ROC') legend(x = "bottomright", legend = paste0('AUC = ', validation_auc_value, ' (95% CI ', round(validation_ci_values[1], 2), '-', round(validation_ci_values[3], 2), ')'), lty = 1) dev.off() # plot the calibration curve of the training set calibrate <- "yes" if (calibrate == "yes") { calibrate_flage = TRUE picjzqx = c() msg <- tryCatch({ if (nrow(train_data) > 0) { fitdev <- lrm(as.formula(train_fit), data = train_data, x = TRUE, y = TRUE, maxit = 1000) coxm <- calibrate(fitdev, method = 'boot', B = 1000) savepath <- "path" mytime <- Sys.time() rand <- sample(1:10000, 1) picFormat <- "pdf" if (!dir.exists(savepath)) { dir.create(savepath, recursive = TRUE) } pdf(file = paste0(savepath, "/train_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat)) plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Training set") dev.off() } else { stop("Filtered data is empty.") } }, error = function(e) { print(paste("Error occurred:", e$message)) }) } # Draw the calibration curve of the testing set calibrate <- "yes" if (calibrate == "yes") { calibrate_flage = TRUE picjzqx = c() msg <- tryCatch({ if (nrow(test_data) > 0) { fitdev <- lrm(as.formula(test_fit), data = test_data, x = TRUE, y = TRUE, maxit = 1000) coxm <- calibrate(fitdev, method = 'boot', B = 1000) savepath <- "path" mytime <- Sys.time() rand <- sample(1:10000, 1) picFormat <- "pdf" if (!dir.exists(savepath)) { dir.create(savepath, recursive = TRUE) } pdf(file = paste0(savepath, "/test_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat)) plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Testing set") dev.off() } else { stop("Filtered data is empty.") } }, error = function(e) { print(paste("Error occurred:", e$message)) }) } # Draw the calibration curve of the validation set calibrate <- "yes" if (calibrate == "yes") { calibrate_flage = TRUE picjzqx = c() msg <- tryCatch({ if (nrow(validation_data) > 0) { fitdev <- lrm(as.formula(validation_fit), data = validation_data, x = TRUE, y = TRUE, maxit = 1000) coxm <- calibrate(fitdev, method = 'boot', B = 1000) savepath <- "path" mytime <- Sys.time() rand <- sample(1:10000, 1) picFormat <- "pdf" if (!dir.exists(savepath)) { dir.create(savepath, recursive = TRUE) } pdf(file = paste0(savepath, "/validation_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat)) plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Validation set") dev.off() } else { stop("Filtered data is empty.") } }, error = function(e) { print(paste("Error occurred:", e$message)) }) } # Establish a training set decision curve model train_DCA <- decision_curve(as.formula(train_fit), data = train_data, family = binomial(link = 'logit'), thresholds = seq(0, 1, by = 0.01), confidence.intervals = 0.95, study.design = 'cohort') mytime <- format(Sys.time(), "%Y%m%d%H%M%S") rand <- sample(1000:9999, 1) picFormat <- "pdf" savepath <- "path" dpi <- 300 dcaname <- paste0("train_DCA", mytime, "_", rand, ".", picFormat) pdf(file=paste0(savepath, dcaname), width=8, height=6) plot_decision_curve(train_DCA, curve.names = "train_model", cost.benefit.axis = FALSE, col = "red", confidence.intervals = FALSE, standardize = FALSE) dev.off() # Establish a testing set decision curve model test_DCA <- decision_curve(as.formula(test_fit), data = test_data, family = binomial(link = 'logit'), thresholds = seq(0, 1, by = 0.01), confidence.intervals = 0.95, study.design = 'cohort') mytime <- format(Sys.time(), "%Y%m%d%H%M%S") rand <- sample(1000:9999, 1) picFormat <- "pdf" savepath <- "path" dpi <- 300 dcaname <- paste0("test_DCA", mytime, "_", rand, ".", picFormat) pdf(file=paste0(savepath, dcaname), width=8, height=6) plot_decision_curve(test_DCA, curve.names = "test_model", cost.benefit.axis = FALSE, col = "red", confidence.intervals = FALSE, standardize = FALSE) dev.off() # Establish a validation set decision curve model validation_DCA <- decision_curve(as.formula(validation_fit), data = validation_data, family = binomial(link = 'logit'), thresholds = seq(0, 1, by = 0.01), confidence.intervals = 0.95, study.design = 'cohort') mytime <- format(Sys.time(), "%Y%m%d%H%M%S") rand <- sample(1000:9999, 1) picFormat <- "pdf" savepath <- "path" dpi <- 300 dcaname <- paste0("validation_DCA", mytime, "_", rand, ".", picFormat) pdf(file=paste0(savepath, dcaname), width=8, height=6) plot_decision_curve(validation_DCA, curve.names = "validation_model", cost.benefit.axis = FALSE, col = "red", confidence.intervals = FALSE, standardize = FALSE) dev.off()