# -*- coding: utf-8 -*- """ RLDC algorithm. Use the five - fold cross - validation method to obtain the accuracy metric of classification performance,Run it 10 times. """ from sklearn.model_selection import StratifiedKFold import numpy as np import pandas as pd from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, recall_score, precision_score from sklearn.metrics import confusion_matrix from scipy.spatial import distance #Function for calculating classification performance evaluation indicators********************************************************************* def g_mean_score(y_true, y_pred): cm = confusion_matrix(y_true, y_pred) sensitivities = np.diag(cm) / np.sum(cm, axis=1) g_mean = np.sqrt(np.prod(sensitivities)) return g_mean # Indicator calculation def calculate_metrics(y, y_predict,Jresult): #print("the result of sklearn package") auc = roc_auc_score(y, y_predict) #print("sklearn auc:",auc) g_mean= g_mean_score(y, y_predict) #print("sklearn accuracy:",accuracy) recal = recall_score(y, y_predict) precision = precision_score(y, y_predict) F1_sc=(2*recal*precision)/(recal+precision) new_r=[auc,g_mean,F1_sc] Jresult.extend(new_r) #GNB Classification Performance Evaluation Function def fenlei(x_train,y_train,x_test,y_test): Jresult=[] from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() gnb.fit(x_train,y_train) calculate_metrics(y_test,gnb.predict(x_test),Jresult) return Jresult # Main function************************************************************************************* if __name__ == '__main__': # Create an empty DataFrame to store experiment results experiment_results = pd.DataFrame() column_names = ['GNB_AUC','GNB_Gmean','GNB_F1'] # Create empty columns with column names and add them to the DataFrame for column_name in column_names: experiment_results[column_name] = [] # Set experimental parameters, including the name of the dataset and the number of division intercepts.******************************* datasetY = pd.read_csv('iris.csv') t=9 #********************************************************************** # Split the dataset global X, y y = datasetY['class'] X = datasetY.drop(columns='class') # Calculate the distance matrix between all points A = distance.cdist(X, X, 'euclidean') # Calculate the maximum distance among all points. b =np.max(A[A > 0]) # Create a five-fold stratified cross-validation object kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # Repeat the experiment 10 times for i in range(10): # Perform five-fold cross-validation for train_index, test_index in kfold.split(X, y): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] # Initialize a DataFrame to store the proportions under different intercepts x_train1 = pd.DataFrame(index=X_train.index) x_test1 = pd.DataFrame(index=X_test.index) # Calculate the proportion of minority class points under different intercepts. for factor in np.arange(1,t): current_b = factor * b/t minority_indices = np.where(y_train == 0)[0] #Transform the training set for i in X_train.index: within_distance_indices = np.where(A[i, train_index] <= current_b)[0] within_minority_count = np.sum(np.isin(within_distance_indices, minority_indices)) total_within_count = len(within_distance_indices-1) x_train1.loc[i, f'{factor}b'] = within_minority_count / total_within_count if total_within_count > 0 else 0.0 #Transform the testing set for i in X_test.index: within_distance_indices = np.where(A[i, train_index] <= current_b)[0] within_minority_count = np.sum(np.isin(within_distance_indices, minority_indices)) total_within_count = len(within_distance_indices) x_test1.loc[i, f'{factor}b'] = within_minority_count / total_within_count if total_within_count > 0 else 0.0 #Calculate the classification performance evaluation indicators after conversion Danswer =fenlei(x_train1, y_train, x_test1, y_test) Danswer= np.array(Danswer).reshape(1,3) Dw=pd.DataFrame(Danswer,columns=experiment_results.columns) experiment_results=pd.concat([experiment_results,Dw]) fw1 = experiment_results.mean(axis=0) # Print the average of the final results print(fw1)