# Load the R package
library(readxl)
library(dplyr)
library(gtsummary)
library(gt)
library(regplot)
library(pROC)
library(ggplot2)
library(rms)
library(rmda)

# read data
data <- read_excel("path.xlsx")

# The data of training set, test set and verification set are extracted respectively.
train_data <- subset(data, Group == 0)
test_data <- subset(data, Group == 1)
validation_data <- subset(data, Group == 2)

# Construct a logistic regression model
train_fit<-glm(Y~X1+X2, data=train_data, family=binomial())
test_fit<-glm(Y~X1+X2, data=test_data, family=binomial())
validation_fit<-glm(Y~X1+X2, data=validation_data, family=binomial())

train_fit
test_fit
validation_fit

# plot nomogram
nomogram <- regplot(train_fit,observation=train_data[15,],leftlabel=T,cexvars=1,
                    center=T,subticks=T,droplines=T,title='nomogram',
                    points=T,odds=F,showP=T,dencol='grey',boxcol='grey',
                    rank='sd',interval='confidence',clickable=F)

# Calculate the model prediction probability
train_data$prob <- predict(train_fit, type = "response")
test_data$prob <- predict(test_fit, type = "response")
validation_data$prob <- predict(validation_fit, type = "response")

# plot the ROC curve of the training set and save it as a PDF file
train_roc_curve <- roc(train_data$Y, train_data$prob)
train_auc_value <- round(train_roc_curve$auc, 3)
train_ci_values <- ci(train_roc_curve)
pdf_path <- "path.pdf"
pdf(pdf_path, width = 8, height = 8)
plot(train_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Training set ROC')
legend(x = "bottomright", legend = paste0('AUC = ', train_auc_value, ' (95% CI ', round(train_ci_values[1], 2), '-', round(train_ci_values[3], 2), ')'), lty = 1)
dev.off()

# plot the ROC curve of the testing set and save it as a PDF file
test_roc_curve <- roc(test_data$Y, test_data$prob)
test_auc_value <- round(test_roc_curve$auc, 3)
test_ci_values <- ci(test_roc_curve)
pdf_path <- "path.pdf"
pdf(pdf_path, width = 8, height = 8)
plot(test_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Testing set ROC')
legend(x = "bottomright", legend = paste0('AUC = ', test_auc_value, ' (95% CI ', round(test_ci_values[1], 2), '-', round(test_ci_values[3], 2), ')'), lty = 1)
dev.off()

# plot the ROC curve of the validation set and save it as a PDF file
validation_roc_curve <- roc(validation_data$Y, validation_data$prob)
validation_auc_value <- round(validation_roc_curve$auc, 3)
validation_ci_values <- ci(validation_roc_curve)
pdf_path <- "path.pdf"
pdf(pdf_path, width = 8, height = 8)
plot(validation_roc_curve, legacy.axes = TRUE, col = "black", print.auc = FALSE, grid = TRUE, auc.polygon = FALSE, auc.polygon.border = TRUE, main = 'Validation set ROC')
legend(x = "bottomright", legend = paste0('AUC = ', validation_auc_value, ' (95% CI ', round(validation_ci_values[1], 2), '-', round(validation_ci_values[3], 2), ')'), lty = 1)
dev.off()

# plot the calibration curve of the training set
calibrate <- "yes"
if (calibrate == "yes") {
  calibrate_flage = TRUE
  picjzqx = c()
  msg <- tryCatch({
    if (nrow(train_data) > 0) {
      fitdev <- lrm(as.formula(train_fit), data = train_data, x = TRUE, y = TRUE, maxit = 1000)
      coxm <- calibrate(fitdev, method = 'boot', B = 1000)
      savepath <- "path"
      mytime <- Sys.time() 
      rand <- sample(1:10000, 1) 
      picFormat <- "pdf" 
      if (!dir.exists(savepath)) {
        dir.create(savepath, recursive = TRUE)
      }
      pdf(file = paste0(savepath, "/train_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat))
      plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Training set")
      dev.off()
      
    } else {
      stop("Filtered data is empty.")
    }
  }, error = function(e) {
    print(paste("Error occurred:", e$message))
  })
}

# Draw the calibration curve of the testing set
calibrate <- "yes"  
if (calibrate == "yes") {
  calibrate_flage = TRUE
  picjzqx = c()
  msg <- tryCatch({
    if (nrow(test_data) > 0) {
      fitdev <- lrm(as.formula(test_fit), data = test_data, x = TRUE, y = TRUE, maxit = 1000)
      coxm <- calibrate(fitdev, method = 'boot', B = 1000)
      savepath <- "path"
      mytime <- Sys.time() 
      rand <- sample(1:10000, 1) 
      picFormat <- "pdf" 
      if (!dir.exists(savepath)) {
        dir.create(savepath, recursive = TRUE)
      }
      pdf(file = paste0(savepath, "/test_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat))
      plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Testing set")
      dev.off()
      
    } else {
      stop("Filtered data is empty.")
    }
  }, error = function(e) {
    print(paste("Error occurred:", e$message))
  })
}

# Draw the calibration curve of the validation set
calibrate <- "yes"  
if (calibrate == "yes") {
  calibrate_flage = TRUE
  picjzqx = c()
  msg <- tryCatch({
    if (nrow(validation_data) > 0) {
      fitdev <- lrm(as.formula(validation_fit), data = validation_data, x = TRUE, y = TRUE, maxit = 1000)
      coxm <- calibrate(fitdev, method = 'boot', B = 1000)
      savepath <- "path"
      mytime <- Sys.time() 
      rand <- sample(1:10000, 1) 
      picFormat <- "pdf" 
      if (!dir.exists(savepath)) {
        dir.create(savepath, recursive = TRUE)
      }
      pdf(file = paste0(savepath, "/validation_calibration_curve_", format(mytime, "%Y%m%d_%H%M%S_"), rand, ".", picFormat))
      plot(coxm, xlab = "Predicted Probability", ylab = "Observed Probability", main = "Validation set")
      dev.off()
      
    } else {
      stop("Filtered data is empty.")
    }
  }, error = function(e) {
    print(paste("Error occurred:", e$message))
  })
}

# Establish a training set decision curve model
train_DCA <- decision_curve(as.formula(train_fit), data = train_data, family = binomial(link = 'logit'),
                        thresholds = seq(0, 1, by = 0.01),
                        confidence.intervals = 0.95, study.design = 'cohort')
mytime <- format(Sys.time(), "%Y%m%d%H%M%S")  
rand <- sample(1000:9999, 1)  
picFormat <- "pdf"  
savepath <- "path"  
dpi <- 300  
dcaname <- paste0("train_DCA", mytime, "_", rand, ".", picFormat)
pdf(file=paste0(savepath, dcaname), width=8, height=6)  
plot_decision_curve(train_DCA, curve.names = "train_model",  
                    cost.benefit.axis = FALSE, col = "red",  
                    confidence.intervals = FALSE, standardize = FALSE)
dev.off()


# Establish a testing set decision curve model
test_DCA <- decision_curve(as.formula(test_fit), data = test_data, family = binomial(link = 'logit'),
                            thresholds = seq(0, 1, by = 0.01),
                            confidence.intervals = 0.95, study.design = 'cohort')
mytime <- format(Sys.time(), "%Y%m%d%H%M%S")  
rand <- sample(1000:9999, 1)  
picFormat <- "pdf"  
savepath <- "path"  
dpi <- 300  
dcaname <- paste0("test_DCA", mytime, "_", rand, ".", picFormat)
pdf(file=paste0(savepath, dcaname), width=8, height=6)  
plot_decision_curve(test_DCA, curve.names = "test_model",  
                    cost.benefit.axis = FALSE, col = "red",  
                    confidence.intervals = FALSE, standardize = FALSE)
dev.off()

# Establish a validation set decision curve model
validation_DCA <- decision_curve(as.formula(validation_fit), data = validation_data, family = binomial(link = 'logit'),
                           thresholds = seq(0, 1, by = 0.01),
                           confidence.intervals = 0.95, study.design = 'cohort')
mytime <- format(Sys.time(), "%Y%m%d%H%M%S")  
rand <- sample(1000:9999, 1)  
picFormat <- "pdf"  
savepath <- "path"  
dpi <- 300  
dcaname <- paste0("validation_DCA", mytime, "_", rand, ".", picFormat)
pdf(file=paste0(savepath, dcaname), width=8, height=6)  
plot_decision_curve(validation_DCA, curve.names = "validation_model",  
                    cost.benefit.axis = FALSE, col = "red",  
                    confidence.intervals = FALSE, standardize = FALSE)
dev.off()