install.packages('car')
install.packages('rms')
install.packages('pROC')


#1
install.packages('DecisionCurve')
#2
BiocManager::install("DecisionCurve",ask = F,update = F)
#3
library(devtools)
install_github("mdbrown/DecisionCurve")
#4 
data_dir <- choose.dir(default = "D:\\data_dir", caption = "Set the working directory to the location of the "DecisionCurve_1.3.tar.gz" package.")
install.packages(paste0(data_dir,"\\DecisionCurve_1.3.tar.gz"), 
                 repos = NULL, 
                 type = "source")

##0.1 
rm(list = ls())

library(car)
library(rms)
library(pROC)
library(rmda)
##0.2 
data_dir <- choose.dir(default = "D:\\data_dir", caption = "Set the working directory to the location of your data files.")


##0.3 
output_dir <- choose.dir(default = "D:\\output_dir", caption = "Set the output directory for saving the plots.")

##0.4 
training_dataset_path <- choose.files(default = data_dir, caption = "Please choose the text file that contains the training dataset.",
                          multi = TRUE, filters = Filters,
                          index = nrow(Filters))
#
training_dataset<- read.csv(training_dataset_path, header = TRUE,sep="\t", stringsAsFactors=FALSE)

#
print(paste0("The training set has ", dim(training_dataset)[1], " samples and ", dim(training_dataset)[2], " variables."))
##0.5 

#
validation_dataset_path <- choose.files(default = data_dir, caption = "Please specify the text file containing the training set data.",
                                      multi = TRUE, filters = Filters,
                                      index = nrow(Filters))

#
validation_dataset<- read.csv(validation_dataset_path, header = TRUE,sep="\t", stringsAsFactors=FALSE)

#

print(paste0("The validation set contains ", dim(validation_dataset)[1], " samples and ", dim(validation_dataset)[2], " variables."))

#######
com_type="btw_datasets"
var_name="Gender"
#
comparison_nomogram<-function(com_type,training_dataset,validation_dataset,var_name,var_type){
  #
  if(com_type=="btw_datasets"){
    data_1=training_dataset[,var_name]
    data_2=validation_dataset[,var_name]
  }else if(com_type=="btw_grp"){
    data_1=training_dataset[(training_dataset[,1]==0),var_name]
    data_2=training_dataset[(training_dataset[,1]==1),var_name]
  }
  #
  if(var_type=="continue_type"){
    judge_p<-function(p1,p2,p3,tp,zhp){
      mark<-0
      if(p1<0.05){
        mark<-1
      }
      if(p2<0.05){
        mark<-1
      }
      if(p3<0.05){
        mark<-1
      }
      if(mark==0){
        return(tp)
      }else{
        return(zhp)
      }
    }
    grp_1_ztp<-shapiro.test(data_1)[2][[1]]
    grp_2_ztp<-shapiro.test(data_2)[2][[1]]
    y_leveneT=c(data_1, data_2)
    group_leveneT=as.factor(c(rep(1,length(data_1)), rep(2,length(data_2))))
    fcp<-leveneTest(y_leveneT,group_leveneT)[3][[1]][1]
    t_testp<-t.test(data_1,data_2,paired=F)[3][[1]]
    df = data.frame(y_leveneT,group_leveneT)
    zhp<-wilcox.test(y_leveneT~group_leveneT, df)[3][[1]]
    finalp<-judge_p(p1=grp_1_ztp, p2=grp_2_ztp, p3=fcp, tp=t_testp, zhp=zhp)
  }else if(var_type=="bi_type"){
    #相关函数
    col_matrix_producer<-function(grp_0_dingxing_data,grp_1_dingxing_data){
      vars<-unique(c(names(table(grp_0_dingxing_data)),names(table(grp_1_dingxing_data))))
      col_matrix<-matrix(data = 0, nrow = length(vars), ncol = 2, byrow = FALSE,dimnames = NULL)
      colnames(col_matrix)<-c("grp_0","grp_1")
      rownames(col_matrix)<-vars
      for(index_var in 1:length(vars)){
        if(!is.na(table(grp_0_dingxing_data)[vars[index_var]][[1]])){
          col_matrix[vars[index_var],"grp_0"]<-table(grp_0_dingxing_data)[vars[index_var]][[1]]
        }
      }
      for(index_var in 1:length(vars)){
        if(!is.na(table(grp_1_dingxing_data)[vars[index_var]][[1]])){
          col_matrix[vars[index_var],"grp_1"]<-table(grp_1_dingxing_data)[vars[index_var]][[1]]
        }
      }
      return(col_matrix)
    }
    col_matrix_ratio_producer<-function(col_matrix, round_num){
      out_table<-col_matrix
      for(index_col in 1:ncol(col_matrix)){
        sum_col<-sum(col_matrix[,index_col])
        for(index_row in 1:nrow(col_matrix)){
          ratio<-round(100*col_matrix[index_row,index_col]/sum_col, round_num)
          out_table[index_row,index_col]<-paste0(col_matrix[index_row,index_col], " (",ratio,"%)")
        }
      }
      return(out_table)
    }
    ##Drop missing values from the dataset.
    b0 = which(data_1=="NA")
    if(length(b0)){
      data_1<-data_1[-b0]
    }
    b1 = which(data_2=="NA")
    if(length(b1)){
      data_2<-data_2[-b1]
    }
    #
    col_matrix<-col_matrix_producer(data_1,data_2)
    #
    chisqcp<-chisq.test(col_matrix)[3][[1]]
    fisherp<- tryCatch(fisher.test(col_matrix)[1][[1]],error=function(e){return("A")} )
    if(fisherp=="A"){           #  Check if the expression in the current loop's try block executed successfully.
      fisherp<-fisher.test(col_matrix,simulate.p.value=TRUE)[1][[1]]
      # 
    }
    #fisherp<-fisher.test(col_matrix)[1][[1]]
    col_matrix_observed<-chisq.test(col_matrix)$observed
    col_matrix_expected<-chisq.test(col_matrix)$expected
    round_num<-1
    col_matrix_ratio<-col_matrix_ratio_producer(col_matrix, round_num)
    ##
    sum_N<-sum(col_matrix)
    #
    if(any(col_matrix_expected<5) | sum_N<40){
      finalp<-fisherp
    }else{
      finalp<-chisqcp
    }
  }

  return(paste0("The p-value for variable ", var_name, " is: ", finalp))
}


#
com_type="btw_datasets"
#
var_name = "Age"
#
var_type = "continue_type"
#
  
comparison_nomogram(com_type=com_type,
                      training_dataset=training_dataset,
                      validation_dataset=validation_dataset,
                      var_name=var_name,
                      var_type=var_type)
#### 2.
com_type="btw_grp"
#
var_name = "Age"
#
var_type = "continue_type"
#
comparison_nomogram(com_type=com_type,
                    training_dataset=training_dataset,
                    validation_dataset=validation_dataset,
                    var_name=var_name,
                    var_type=var_type)

################  3.########################

##3.1 Univariate Logistic

f_lrm <-lrm(Group ~ Age, data=training_dataset, x=TRUE, y=TRUE,maxit=1000)
#Coef、p value
print(f_lrm)


##3.2 Multivariate Logistic
f_lrm <-lrm(Group~ MCT_P+BUN+Ca, data=training_dataset, x=TRUE, y=TRUE,maxit=1000)
#
print(f_lrm)


################  Nomogram ##########################

ddist <- datadist(training_dataset)
options(datadist='ddist')
#nomogram
pdf(file=paste(output_dir, "\\nomogram.pdf", sep = ""),width=10,height=8) 
nomogram <- nomogram(f_lrm,fun=function(x)1/(1+exp(-x)), ##
                     fun.at = c(0.01,0.1,0.3,0.5,0.7,0.9,0.99),#
                     funlabel = "Pre of HTG_SAP", #
                     lp=F,  ##
                     conf.int = F, ##
                     abbrev = F#
)
#nomogram
plot(nomogram)
dev.off()

################  5.Model varlidation ##########################

##5.1 ROC of training
pred_f_training<-predict(f_lrm,training_dataset)
#
modelroc <- roc(training_dataset$Group,pred_f_training)
#ROC
pdf(file=paste(output_dir, "\\ROC_training.pdf", sep = ""),width=10,height=10) 
plot(modelroc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),
     print.thres=TRUE)
dev.off()

##5.2 ROC of validatiopn
pred_f_validation<-predict(f_lrm,validation_dataset)
#
modelroc <- roc(validation_dataset$Group,pred_f_validation)
#
pdf(file=paste(output_dir, "\\ROC_testing.pdf", sep = ""),width=10,height=10) 
plot(modelroc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),
     print.thres=TRUE)
dev.off()

##5.3 calibration of training
cal <- calibrate(f_lrm)
pdf(file=paste(output_dir, "\\calibrate_training.pdf", sep = ""),width=10,height=10) 
plot(cal)
dev.off()

##5.4 calibration of validation
fit.vad<-lrm(validation_dataset$Group~pred_f_validation,data=validation_dataset,x=T,y=T)

pdf(file=paste(output_dir, "\\calibrate_testing.pdf", sep = ""),width=10,height=10) 
cal <- calibrate(fit.vad)
plot(cal)
dev.off()


#DCA of training
pdf(file=paste(output_dir, "\\DCA_training.pdf", sep = ""),width=10,height=10) 
DCA_training<- decision_curve(Group ~MCT_P+BUN+Ca,data = training_dataset
                        #,policy = "opt-in"
                        ,study.design = 'cohort')
plot_decision_curve(DCA_training,curve.names= c('Nomogram model'))
dev.off()

#DCA of validation
pdf(file=paste(output_dir, "\\DCA_testing.pdf", sep = ""),width=10,height=10) 
DCA_training<- decision_curve(Group ~MCT_P+BUN+Ca,data = validation_dataset
                              #,policy = "opt-in"
                              ,study.design = 'cohort')
plot_decision_curve(DCA_training,curve.names= c('Nomogram model'))
dev.off()