install.packages('car') install.packages('rms') install.packages('pROC') #1 install.packages('DecisionCurve') #2 BiocManager::install("DecisionCurve",ask = F,update = F) #3 library(devtools) install_github("mdbrown/DecisionCurve") #4 data_dir <- choose.dir(default = "D:\\data_dir", caption = "Set the working directory to the location of the "DecisionCurve_1.3.tar.gz" package.") install.packages(paste0(data_dir,"\\DecisionCurve_1.3.tar.gz"), repos = NULL, type = "source") ##0.1 rm(list = ls()) library(car) library(rms) library(pROC) library(rmda) ##0.2 data_dir <- choose.dir(default = "D:\\data_dir", caption = "Set the working directory to the location of your data files.") ##0.3 output_dir <- choose.dir(default = "D:\\output_dir", caption = "Set the output directory for saving the plots.") ##0.4 training_dataset_path <- choose.files(default = data_dir, caption = "Please choose the text file that contains the training dataset.", multi = TRUE, filters = Filters, index = nrow(Filters)) # training_dataset<- read.csv(training_dataset_path, header = TRUE,sep="\t", stringsAsFactors=FALSE) # print(paste0("The training set has ", dim(training_dataset)[1], " samples and ", dim(training_dataset)[2], " variables.")) ##0.5 # validation_dataset_path <- choose.files(default = data_dir, caption = "Please specify the text file containing the training set data.", multi = TRUE, filters = Filters, index = nrow(Filters)) # validation_dataset<- read.csv(validation_dataset_path, header = TRUE,sep="\t", stringsAsFactors=FALSE) # print(paste0("The validation set contains ", dim(validation_dataset)[1], " samples and ", dim(validation_dataset)[2], " variables.")) ####### com_type="btw_datasets" var_name="Gender" # comparison_nomogram<-function(com_type,training_dataset,validation_dataset,var_name,var_type){ # if(com_type=="btw_datasets"){ data_1=training_dataset[,var_name] data_2=validation_dataset[,var_name] }else if(com_type=="btw_grp"){ data_1=training_dataset[(training_dataset[,1]==0),var_name] data_2=training_dataset[(training_dataset[,1]==1),var_name] } # if(var_type=="continue_type"){ judge_p<-function(p1,p2,p3,tp,zhp){ mark<-0 if(p1<0.05){ mark<-1 } if(p2<0.05){ mark<-1 } if(p3<0.05){ mark<-1 } if(mark==0){ return(tp) }else{ return(zhp) } } grp_1_ztp<-shapiro.test(data_1)[2][[1]] grp_2_ztp<-shapiro.test(data_2)[2][[1]] y_leveneT=c(data_1, data_2) group_leveneT=as.factor(c(rep(1,length(data_1)), rep(2,length(data_2)))) fcp<-leveneTest(y_leveneT,group_leveneT)[3][[1]][1] t_testp<-t.test(data_1,data_2,paired=F)[3][[1]] df = data.frame(y_leveneT,group_leveneT) zhp<-wilcox.test(y_leveneT~group_leveneT, df)[3][[1]] finalp<-judge_p(p1=grp_1_ztp, p2=grp_2_ztp, p3=fcp, tp=t_testp, zhp=zhp) }else if(var_type=="bi_type"){ #相关函数 col_matrix_producer<-function(grp_0_dingxing_data,grp_1_dingxing_data){ vars<-unique(c(names(table(grp_0_dingxing_data)),names(table(grp_1_dingxing_data)))) col_matrix<-matrix(data = 0, nrow = length(vars), ncol = 2, byrow = FALSE,dimnames = NULL) colnames(col_matrix)<-c("grp_0","grp_1") rownames(col_matrix)<-vars for(index_var in 1:length(vars)){ if(!is.na(table(grp_0_dingxing_data)[vars[index_var]][[1]])){ col_matrix[vars[index_var],"grp_0"]<-table(grp_0_dingxing_data)[vars[index_var]][[1]] } } for(index_var in 1:length(vars)){ if(!is.na(table(grp_1_dingxing_data)[vars[index_var]][[1]])){ col_matrix[vars[index_var],"grp_1"]<-table(grp_1_dingxing_data)[vars[index_var]][[1]] } } return(col_matrix) } col_matrix_ratio_producer<-function(col_matrix, round_num){ out_table<-col_matrix for(index_col in 1:ncol(col_matrix)){ sum_col<-sum(col_matrix[,index_col]) for(index_row in 1:nrow(col_matrix)){ ratio<-round(100*col_matrix[index_row,index_col]/sum_col, round_num) out_table[index_row,index_col]<-paste0(col_matrix[index_row,index_col], " (",ratio,"%)") } } return(out_table) } ##Drop missing values from the dataset. b0 = which(data_1=="NA") if(length(b0)){ data_1<-data_1[-b0] } b1 = which(data_2=="NA") if(length(b1)){ data_2<-data_2[-b1] } # col_matrix<-col_matrix_producer(data_1,data_2) # chisqcp<-chisq.test(col_matrix)[3][[1]] fisherp<- tryCatch(fisher.test(col_matrix)[1][[1]],error=function(e){return("A")} ) if(fisherp=="A"){ # Check if the expression in the current loop's try block executed successfully. fisherp<-fisher.test(col_matrix,simulate.p.value=TRUE)[1][[1]] # } #fisherp<-fisher.test(col_matrix)[1][[1]] col_matrix_observed<-chisq.test(col_matrix)$observed col_matrix_expected<-chisq.test(col_matrix)$expected round_num<-1 col_matrix_ratio<-col_matrix_ratio_producer(col_matrix, round_num) ## sum_N<-sum(col_matrix) # if(any(col_matrix_expected<5) | sum_N<40){ finalp<-fisherp }else{ finalp<-chisqcp } } return(paste0("The p-value for variable ", var_name, " is: ", finalp)) } # com_type="btw_datasets" # var_name = "Age" # var_type = "continue_type" # comparison_nomogram(com_type=com_type, training_dataset=training_dataset, validation_dataset=validation_dataset, var_name=var_name, var_type=var_type) #### 2. com_type="btw_grp" # var_name = "Age" # var_type = "continue_type" # comparison_nomogram(com_type=com_type, training_dataset=training_dataset, validation_dataset=validation_dataset, var_name=var_name, var_type=var_type) ################ 3.######################## ##3.1 Univariate Logistic f_lrm <-lrm(Group ~ Age, data=training_dataset, x=TRUE, y=TRUE,maxit=1000) #Coef、p value print(f_lrm) ##3.2 Multivariate Logistic f_lrm <-lrm(Group~ MCT_P+BUN+Ca, data=training_dataset, x=TRUE, y=TRUE,maxit=1000) # print(f_lrm) ################ Nomogram ########################## ddist <- datadist(training_dataset) options(datadist='ddist') #nomogram pdf(file=paste(output_dir, "\\nomogram.pdf", sep = ""),width=10,height=8) nomogram <- nomogram(f_lrm,fun=function(x)1/(1+exp(-x)), ## fun.at = c(0.01,0.1,0.3,0.5,0.7,0.9,0.99),# funlabel = "Pre of HTG_SAP", # lp=F, ## conf.int = F, ## abbrev = F# ) #nomogram plot(nomogram) dev.off() ################ 5.Model varlidation ########################## ##5.1 ROC of training pred_f_training<-predict(f_lrm,training_dataset) # modelroc <- roc(training_dataset$Group,pred_f_training) #ROC pdf(file=paste(output_dir, "\\ROC_training.pdf", sep = ""),width=10,height=10) plot(modelroc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2), print.thres=TRUE) dev.off() ##5.2 ROC of validatiopn pred_f_validation<-predict(f_lrm,validation_dataset) # modelroc <- roc(validation_dataset$Group,pred_f_validation) # pdf(file=paste(output_dir, "\\ROC_testing.pdf", sep = ""),width=10,height=10) plot(modelroc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2), print.thres=TRUE) dev.off() ##5.3 calibration of training cal <- calibrate(f_lrm) pdf(file=paste(output_dir, "\\calibrate_training.pdf", sep = ""),width=10,height=10) plot(cal) dev.off() ##5.4 calibration of validation fit.vad<-lrm(validation_dataset$Group~pred_f_validation,data=validation_dataset,x=T,y=T) pdf(file=paste(output_dir, "\\calibrate_testing.pdf", sep = ""),width=10,height=10) cal <- calibrate(fit.vad) plot(cal) dev.off() #DCA of training pdf(file=paste(output_dir, "\\DCA_training.pdf", sep = ""),width=10,height=10) DCA_training<- decision_curve(Group ~MCT_P+BUN+Ca,data = training_dataset #,policy = "opt-in" ,study.design = 'cohort') plot_decision_curve(DCA_training,curve.names= c('Nomogram model')) dev.off() #DCA of validation pdf(file=paste(output_dir, "\\DCA_testing.pdf", sep = ""),width=10,height=10) DCA_training<- decision_curve(Group ~MCT_P+BUN+Ca,data = validation_dataset #,policy = "opt-in" ,study.design = 'cohort') plot_decision_curve(DCA_training,curve.names= c('Nomogram model')) dev.off()