#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# Import some the required libraries to read the dataset
import re
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report, roc_auc_score,f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, precision_recall_curve
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from numpy.random import rand
import math
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import datasets, linear_model
from genetic_selection import GeneticSelectionCV
import random
from sklearn import datasets, linear_model
from genetic_selection import GeneticSelectionCV
from sklearn.model_selection import cross_val_score,cross_validate
from sklearn import svm
import random
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

plt.rcParams['font.sans-serif'] = ['SimHei']   
plt.rcParams['axes.unicode_minus'] = False   


# In[ ]:


def evaluate_model(ytest, y_pred, ypred_proba = None):
    if ypred_proba is not None:
        print('ROC-AUC score of the model:   {}'.format(roc_auc_score(ytest, ypred_proba[:, 1])))
    print('Accuracy of the model: {}\n'.format(accuracy_score(ytest, y_pred)))
    print('Precision of the model: {}\n'.format(precision_score(ytest, y_pred)))
    print('Recall of the model: {}\n'.format(recall_score(ytest, y_pred)))
    print('F1-Score of the model: {}\n'.format(f1_score(ytest, y_pred)))
    print('Classification report: \n{}\n'.format(classification_report(ytest, y_pred)))
    print('Confusion matrix: \n{}\n'.format(confusion_matrix(ytest, y_pred)))


# In[ ]:


# Importing of the dataset into the colab environment 
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\ALL1(急性淋巴白血病).csv') 
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\DLBCL.csv')   
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\Leukemia.csv')  
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\SRBCT.csv')
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\Prostate.csv')
Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\CNS.csv') 
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\Gastric2.csv')      
#Train = pd.read_csv(r'C:\\Users\\MECHREVO\\Desktop\\Ovarian.csv')  
Train


# In[ ]:


def is_simple_numpy_number(dtype):
    if np.issubdtype(dtype, np.integer):
        return True

    if np.issubdtype(dtype, np.floating):
        return True

    return False

list_1=[]
for i,j in zip(Train.dtypes,Train.dtypes.index):
    result=is_simple_numpy_number(i)
    if result==False:
        list_1.append(j)

list_1


# In[ ]:


import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

X = Train.drop(['Y'], axis = 1)
y = Train.Y


#MinMax Scaling for the normalisation of the dataset features 
from sklearn.preprocessing import MinMaxScaler
ma = MinMaxScaler()
ma.fit(X)
X = ma.transform(X)

X=pd.DataFrame(X)
X.columns=Train.drop(['Y'], axis = 1).columns


xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.20, random_state = 10)


# In[ ]:


# In[ ]:


# varian filter
from sklearn.feature_selection import VarianceThreshold
#VarianceThreshold
def select1(xtrain,var):
    start_time = datetime.now()
    
    select = VarianceThreshold(threshold=(var))
    X_train_selected=select.fit_transform(xtrain)
    
    end_time = datetime.now()
    print('varian filter train Duration: {}'.format(end_time - start_time))
    
#    mask = select.get_support()
#    print('selected VarianceThreshold:',X1.columns[mask].values)
#     plt.figure(figsize=(20,2))
    # visualize the mask. black is True, white is False
#    plt.matshow(mask.reshape(1, -1), cmap='gray_r')
#    plt.xlabel("Sample index")
#    plt.yticks(())
#    plt.show()
    return select

# select1(xtrain)


# In[ ]:


#Extremely randomized tree
###################ERT##############
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel

def select6(xtrain, ytrain):

    clf = ExtraTreesClassifier(n_estimators=50)
    start_time = datetime.now()

    clf = clf.fit(xtrain, ytrain)
    
    
    #
    print(clf.feature_importances_)  

    select = SelectFromModel(clf, prefit=True)
    X_train_selected = select.transform(xtrain)
    
    end_time = datetime.now()
    print('Extremely randomized tree train Duration: {}'.format(end_time - start_time))
    

#     select.get_support()
#     mask = select.get_support()
#     print('selected SelectFromModel tree:',X1.columns[mask].values)
#     # visualize the mask. black is True, white is False
#     plt.matshow(mask.reshape(1, -1), cmap='gray_r')
#     plt.xlabel("Sample index")
#     plt.yticks(())
#     plt.show()
    return select

# select6(xtrain, ytrain)


# In[ ]:


#HHO
# error rate
import random

def error_rate(xtrain, ytrain, x, opts):
    
    # parameters
    k     = opts['k']
    fold  = opts['fold']
    xt    = fold['xt']
    yt    = fold['yt']
    xv    = fold['xv']
    yv    = fold['yv']
    
    # Number of instances
    num_train = np.size(xt, 0)
    num_valid = np.size(xv, 0)
    # Define selected features
    xtrain  = xt[:, x == 1]
    ytrain  = yt.reshape(num_train)  # Solve bug
    xvalid  = xv[:, x == 1]
    yvalid  = yv.reshape(num_valid)  # Solve bug   
    # Training
    mdl     = KNeighborsClassifier(n_neighbors = k)     #########KNN
    mdl.fit(xtrain, ytrain)
    # Prediction
    ypred   = mdl.predict(xvalid)
    acc     = np.sum(yvalid == ypred) / num_valid       ######
    error   = 1 - acc 
    return error

# Error rate & Feature size
def Fun(xtrain, ytrain, x, opts):
    
    
    # Parameters
    #alpha    = 0.1
    #alpha    = 0.2
    #alpha    = 0.3
    #alpha    = 0.4    
   # alpha    = 0.5    
  #  alpha    = 0.6     
  #  alpha    = 0.7
  #  alpha    = 0.8
  #  alpha    = 0.9
    alpha    = 0.99
    
    beta     = 1 - alpha
    # Original feature size
    max_feat = len(x)
    # Number of selected features
    num_feat = np.sum(x == 1)
    # Solve if no feature selected
    if num_feat == 0:
        cost  = 1
    else:
        # Get error rate
        error = error_rate(xtrain, ytrain, x, opts)
        # Objective function
        cost  = alpha * error + beta * (num_feat / max_feat)  ############fitness function
    return cost

def init_position(lb, ub, N, dim):
    X = np.zeros([N, dim], dtype='float')
    for i in range(N):
        for d in range(dim):
            X[i,d] = lb[0,d] + (ub[0,d] - lb[0,d]) * rand()        
    return X

def binary_conversion(X, thres, N, dim):
    Xbin = np.zeros([N, dim], dtype='int')
    for i in range(N):
        for d in range(dim):
            if X[i,d] > thres:    ###thres=0.5
                Xbin[i,d] = 1
            else:
                Xbin[i,d] = 0    
    return Xbin


def boundary(x, lb, ub):
    if x < lb:
        x = lb
    if x > ub:
        x = ub
    
    return x
def levy_distribution(beta, dim):
    
    # Sigma 
    nume  = math.gamma(1 + beta) * np.sin(np.pi * beta / 2)
    deno  = math.gamma((1 + beta) / 2) * beta * 2 ** ((beta - 1) / 2)
    sigma = (nume / deno) ** (1 / beta)
    # Parameter u & v 
    u     = np.random.randn(dim) * sigma
    v     = np.random.randn(dim)
    # Step 
    step  = u / abs(v) ** (1 / beta)
    LF    = 0.01 * step    

    return LF


def jfs(xtrain, ytrain, opts):
    
        
    # Parameters
    ub    = 1
    lb    = 0
    thres = 0.5
    beta  = 1.5    # levy component
    
    N        = opts['N']
    max_iter = opts['T']
    if 'beta' in opts:
        beta = opts['beta']
        
    # Dimension
    dim = np.size(xtrain, 1)
    if np.size(lb) == 1:
        ub = ub * np.ones([1, dim], dtype='float')
        lb = lb * np.ones([1, dim], dtype='float')
        
    # Initialize position 
    X     = init_position(lb, ub, N, dim)
    
    # Pre
    fit   = np.zeros([N, 1], dtype='float')
    Xrb   = np.zeros([1, dim], dtype='float')
    fitR  = float('inf')
            
    curve = np.zeros([1, max_iter], dtype='float') 
    t     = 0
    
    while t < max_iter:
        # Binary conversion
        Xbin = binary_conversion(X, thres, N, dim)
        
        # Fitness
        for i in range(N):
            fit[i,0] = Fun(xtrain, ytrain, Xbin[i,:], opts)
            if fit[i,0] < fitR:
                Xrb[0,:] = X[i,:]
                fitR     = fit[i,0]
                
        # Store result
        curve[0,t] = fitR.copy()
        print("Iteration:", t + 1)
        print("Best (HHO):", curve[0,t])
        t += 1

        # Mean position of hawk (2)
        X_mu      = np.zeros([1, dim], dtype='float')
        X_mu[0,:] = np.mean(X, axis=0)
        
        for i in range(N):
            # Random number in [-1,1]
            E0 = -1 + 2 * rand()
            # Escaping energy of rabbit (3)
            E  = 2 * E0 * (1 - (t / max_iter)) 
            # Exploration phase
            if abs(E) >= 1:
                # Define q in [0,1]
                q = rand()
                if q >= 0.5:
                    # Random select a hawk k
                    k  = np.random.randint(low = 0, high = N)
                    r1 = rand()
                    r2 = rand()
                    for d in range(dim):
                        # Position update (1)
                        X[i,d] = X[k,d] - r1 * abs(X[k,d] - 2 * r2 * X[i,d])
                        # Boundary
                        X[i,d] = boundary(X[i,d], lb[0,d], ub[0,d])

                elif q < 0.5:    
                    r3 = rand() 
                    r4 = rand()
                    for d in range(dim):
                        # Update Hawk (1)
                        X[i,d] = (Xrb[0,d] - X_mu[0,d]) - r3 * (lb[0,d] + r4 * (ub[0,d] - lb[0,d]))
                        # Boundary
                        X[i,d] = boundary(X[i,d], lb[0,d], ub[0,d])
                        
            # Exploitation phase 
            elif abs(E) < 1:
                # Jump strength 
                J = 2 * (1 - rand()) 
                r = rand()
                # {1} Soft besiege
                if r >= 0.5 and abs(E) >= 0.5:
                    for d in range(dim):
                        # Delta X (5)
                        DX     = Xrb[0,d] - X[i,d]
                        # Position update (4)
                        X[i,d] = DX - E * abs(J * Xrb[0,d] - X[i,d])
                        # Boundary
                        X[i,d] = boundary(X[i,d], lb[0,d], ub[0,d])
                        
                # {2} hard besiege
                elif r >= 0.5 and abs(E) < 0.5:
                    for d in range(dim):
                        # Delta X (5)
                        DX     = Xrb[0,d] - X[i,d]
                        # Position update (6)
                        X[i,d] = Xrb[0,d] - E * abs(DX)    
                        # Boundary
                        X[i,d] = boundary(X[i,d], lb[0,d], ub[0,d])
                        
                # {3} Soft besiege with progressive rapid dives
                elif r < 0.5 and abs(E) >= 0.5:
                    # Levy distribution (9)
                    LF = levy_distribution(beta, dim) 
                    Y  = np.zeros([1, dim], dtype='float')
                    Z  = np.zeros([1, dim], dtype='float')
                    
                    for d in range(dim):
                        # Compute Y (7)
                        Y[0,d] = Xrb[0,d] - E * abs(J * Xrb[0,d] - X[i,d])
                        # Boundary
                        Y[0,d] = boundary(Y[0,d], lb[0,d], ub[0,d])
                        
                    for d in range(dim):
                        # Compute Z (8)
                        Z[0,d] = Y[0,d] + rand() * LF[d]
                        # Boundary
                        Z[0,d] = boundary(Z[0,d], lb[0,d], ub[0,d])          
                    
                    # Binary conversion
                    Ybin = binary_conversion(Y, thres, 1, dim)
                    Zbin = binary_conversion(Z, thres, 1, dim)
                    # fitness
                    fitY = Fun(xtrain, ytrain, Ybin[0,:], opts)
                    fitZ = Fun(xtrain, ytrain, Zbin[0,:], opts)
                    # Greedy selection (10)
                    if fitY < fit[i,0]:
                        fit[i,0]  = fitY 
                        X[i,:]    = Y[0,:]
                    if fitZ < fit[i,0]:
                        fit[i,0]  = fitZ
                        X[i,:]    = Z[0,:]                        

                # {4} Hard besiege with progressive rapid dives
                elif r < 0.5 and abs(E) < 0.5:
                    # Levy distribution (9)
                    LF = levy_distribution(beta, dim) 
                    Y  = np.zeros([1, dim], dtype='float')
                    Z  = np.zeros([1, dim], dtype='float')
                    
                    for d in range(dim):
                        # Compute Y (12)
                        Y[0,d] = Xrb[0,d] - E * abs(J * Xrb[0,d] - X_mu[0,d])
                        # Boundary
                        Y[0,d] = boundary(Y[0,d], lb[0,d], ub[0,d])
                    
                    for d in range(dim):
                        # Compute Z (13)
                        Z[0,d] = Y[0,d] + rand() * LF[d]
                        # Boundary
                        Z[0,d] = boundary(Z[0,d], lb[0,d], ub[0,d])    

                    # Binary conversion
                    Ybin = binary_conversion(Y, thres, 1, dim)
                    Zbin = binary_conversion(Z, thres, 1, dim)
                    # fitness
                    fitY = Fun(xtrain, ytrain, Ybin[0,:], opts)
                    fitZ = Fun(xtrain, ytrain, Zbin[0,:], opts)
                    # Greedy selection (10)
                    if fitY < fit[i,0]:
                        fit[i,0]  = fitY
                        X[i,:]    = Y[0,:]
                    if fitZ < fit[i,0]:
                        fit[i,0]  = fitZ
                        X[i,:]    = Z[0,:]  
    # Best feature subset
    Gbin       = binary_conversion(Xrb, thres, 1, dim) 
    Gbin       = Gbin.reshape(dim)
    pos        = np.asarray(range(0, dim))    
    sel_index  = pos[Gbin == 1]
    num_feat   = len(sel_index)
    # Create dictionary
    hho_data = {'sf': sel_index, 'c': curve, 'nf': num_feat}
    
    return hho_data

def halis(xtrain,ytrain):
    feat  = np.asarray(xtrain)   # feature vector
    label = np.asarray(ytrain)     # label vector

    # split data into train & validation (70 -- 30)
    xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.2, random_state = 10)
    fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}

    # parameter
 ## k    = 10     # k-value in KNN  
    k    = 5     # k-value in KNN
#   N    = 10    # number of particles
    N    = 30   #
    T    = 100   # maximum number of iterations
    
    w    = 0.9
    c1   = 2
    c2   = 2
    opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'w':w, 'c1':c1, 'c2':c2}
    # perform feature selection
    fmdl = jfs(feat, label, opts)
    sf   = fmdl['sf']
    return sf


# In[ ]:


# Importing the Naive Bayes library
##########3 external classifier models#####################################
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import svm

DT = DecisionTreeClassifier(random_state=0,max_depth=8)

SV = svm.SVC(C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, 
             probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=100,
             decision_function_shape='ovr', break_ties=False, random_state=None)


LR = LogisticRegression(penalty='l2',dual=False, tol=0.0001, C=1.0, fit_intercept=True, 
                        intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs',
                        max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)


# In[ ]:


# In[ ]:


############4 VF，ERT##################

start_time = datetime.now()

def VE(clf,xtrain,xtest,ytrain,ytest):
    # VF
    print(xtrain.shape)
    select1_model=select1(xtrain,0.05)
    # train_pred(select1_model,DT)
    # xtrain=select1_model.transform(xtrain)
    # xtest=select1_model.transform(xtest)
    xtrain=trans_data(select1_model,xtrain)
    xtest=trans_data(select1_model,xtest)
#     xtrain.shape

    # ERT
    select6_model=select6(xtrain,ytrain)
#     train_pred(select6_model,DT)
    # xtrain=select6_model.transform(xtrain)
    # xtest=select6_model.transform(xtest)
    xtrain=trans_data(select6_model,xtrain)
    xtest=trans_data(select6_model,xtest)
    xtrain.shape

    print(xtrain.columns)
    print(xtrain.shape)
    
    lr=train_pred_clf(clf,xtrain,xtest,ytrain,ytest)
    
    
  ############DT#########################################


    scores = cross_val_score(DT, xtrain, ytrain, cv=10) 
    print('DT--scores',scores)
    print("DT--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('DT：',scores) 
############SVM##########################################
    scores = cross_val_score(SV, xtrain, ytrain, cv=10)  #
    print('SV--scores',scores)  # 
    print("SV--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('SV--result：',scores)  # 


############LR##########################################
    scores = cross_val_score(LR, xtrain, ytrain, cv=10)  #
    print('LR--scores',scores)  # 
    print("LR--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('LR--result：',scores)  #

    return xtest,ytest,lr

a1,b1,c1=VE(DT,xtrain,xtest,ytrain,ytest)
a2,b2,c2=VE(SV,xtrain,xtest,ytrain,ytest)
a3,b3,c3=VE(LR,xtrain,xtest,ytrain,ytest)


###########running time############################
end_time = datetime.now()
print('running time: {}'.format(end_time - start_time))


# In[ ]:


# In[ ]:


#############VEH#######################
########################################################################
start_time = datetime.now()

def VEH(clf,xtrain,xtest,ytrain,ytest):
    # stage 1
    print(xtrain.shape)
    select1_model=select1(xtrain,0.05)
    # train_pred(select1_model,DT)
    # xtrain=select1_model.transform(xtrain)
    # xtest=select1_model.transform(xtest)
    xtrain=trans_data(select1_model,xtrain)
    xtest=trans_data(select1_model,xtest)

   ##xtrain.shape
    # stage 2
    select6_model=select6(xtrain,ytrain)
    #train_pred(select6_model,DT)
    #xtrain=select6_model.transform(xtrain)
    #xtest=select6_model.transform(xtest)
    xtrain=trans_data(select6_model,xtrain)
    xtest=trans_data(select6_model,xtest)
    xtrain.shape
    # stage 3
    start_time = datetime.now()
    halis_model=halis(xtrain,ytrain)
    xtrain=trans_halis(xtrain,halis_model)
    xtest=trans_halis(xtest,halis_model)
    end_time = datetime.now()
    
    print(xtrain.columns)
    print(xtrain.shape)
    lr=train_pred_clf(clf,xtrain,xtest,ytrain,ytest)
    
       
 ############DT#########################################


    scores = cross_val_score(DT, xtrain, ytrain, cv=10)  
    print('DT--scores',scores) 
    print("DT--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('DT--result：',scores) 

############SVM##########################################
    scores = cross_val_score(SV, xtrain, ytrain, cv=10) 
    print('SV--scores',scores)  
    print("SV--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('SV--result：',scores)  

############LR##########################################
    scores = cross_val_score(LR, xtrain, ytrain, cv=10) 
    print('LR--scores',scores)  
    print("LR--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro'] 

    scores = cross_validate(DT,xtrain,ytrain,scoring=scoring,cv=10, return_train_score=True)
    sorted(scores.keys())
    print('LR--result：',scores)  
    return xtest,ytest,lr
    
a1,b1,c1=VEH(DT,xtrain,xtest,ytrain,ytest)
a2,b2,c2=VEH(SV,xtrain,xtest,ytrain,ytest)
a3,b3,c3=VEH(LR,xtrain,xtest,ytrain,ytest)

###########running time############################
end_time = datetime.now()
print('running time: {}'.format(end_time - start_time))


# In[ ]:


# In[ ]:


##########GA####################
from sklearn import datasets, linear_model
from genetic_selection import GeneticSelectionCV


def select_gs(X,y):
    estimator = linear_model.LogisticRegression(solver="liblinear", multi_class="ovr")

    start_time = datetime.now()
    
    selector = GeneticSelectionCV(
        estimator,
        cv=3,
        verbose=1,
        scoring="accuracy",
        max_features=10,

        n_population=30,   
        crossover_proba=0.5,

        mutation_proba=0.2,


        n_generations=100,   
        crossover_independent_proba=0.5,
        mutation_independent_proba=0.05,
        tournament_size=3,
        n_gen_no_change=10,
        caching=True,
        n_jobs=-1,
    )
    select = selector.fit(X, y)
    
    end_time = datetime.now()
    print('GA train Duration: {}'.format(end_time - start_time))
    
    print(select.support_)
    return select


# In[ ]:


# In[ ]:


#pso
import random
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
# error rate
def error_rate(xtrain, ytrain, x, opts):
    # parameters
    k     = opts['k']
    fold  = opts['fold']
    xt    = fold['xt']
    yt    = fold['yt']
    xv    = fold['xv']
    yv    = fold['yv']
    
    # Number of instances
    num_train = np.size(xt, 0)
    num_valid = np.size(xv, 0)
    # Define selected features
    xtrain  = xt[:, x == 1]
    ytrain  = yt.reshape(num_train)  # Solve bug
    xvalid  = xv[:, x == 1]
    yvalid  = yv.reshape(num_valid)  # Solve bug   
    # Training
    mdl     = KNeighborsClassifier(n_neighbors = k)
    mdl.fit(xtrain, ytrain)
    # Prediction
    ypred   = mdl.predict(xvalid)
    acc     = np.sum(yvalid == ypred) / num_valid
    error   = 1 - acc 
    return error
# Error rate & Feature size
def Fun(xtrain, ytrain, x, opts):
    # Parameters
    alpha    = 0.99
    beta     = 1 - alpha
    # Original feature size
    max_feat = len(x)
    # Number of selected features
    num_feat = np.sum(x == 1)
    # Solve if no feature selected
    if num_feat == 0:
        cost  = 1
    else:
        # Get error rate
        error = error_rate(xtrain, ytrain, x, opts)
        # Objective function
        cost  = alpha * error + beta * (num_feat / max_feat)   
    return cost
########################################################################
import numpy as np
from numpy.random import rand
#from FS.functionHO import Fun
def init_position(lb, ub, N, dim):
    X = np.zeros([N, dim], dtype='float')
    for i in range(N):
        for d in range(dim):
            X[i,d] = lb[0,d] + (ub[0,d] - lb[0,d]) * rand()            
    return X
def init_velocity(lb, ub, N, dim):
    V    = np.zeros([N, dim], dtype='float')
    Vmax = np.zeros([1, dim], dtype='float')
    Vmin = np.zeros([1, dim], dtype='float')
    # Maximum & minimum velocity
    for d in range(dim):
        Vmax[0,d] = (ub[0,d] - lb[0,d]) / 2
        Vmin[0,d] = -Vmax[0,d]        
    for i in range(N):
        for d in range(dim):
            V[i,d] = Vmin[0,d] + (Vmax[0,d] - Vmin[0,d]) * rand()        
    return V, Vmax, Vmin
def binary_conversion(X, thres, N, dim):
    Xbin = np.zeros([N, dim], dtype='int')
    for i in range(N):
        for d in range(dim):
            if X[i,d] > thres:
                Xbin[i,d] = 1
            else:
                Xbin[i,d] = 0    
    return Xbin
def boundary(x, lb, ub):
    if x < lb:
        x = lb
    if x > ub:
        x = ub    
    return x
def jfs(xtrain, ytrain, opts):
    # Parameters
    ub    = 1
    lb    = 0
    thres = 0.5
    w     = 0.9    # inertia weight
    c1    = 2      # acceleration factor
    c2    = 2      # acceleration factor  
    N        = opts['N']
    max_iter = opts['T']
    if 'w' in opts:
        w    = opts['w']
    if 'c1' in opts:
        c1   = opts['c1']
    if 'c2' in opts:
        c2   = opts['c2']    
    # Dimension
    dim = np.size(xtrain, 1)
    if np.size(lb) == 1:
        ub = ub * np.ones([1, dim], dtype='float')
        lb = lb * np.ones([1, dim], dtype='float')       
    # Initialize position & velocity
    X             = init_position(lb, ub, N, dim)
    V, Vmax, Vmin = init_velocity(lb, ub, N, dim)    
    # Pre
    fit   = np.zeros([N, 1], dtype='float')
    Xgb   = np.zeros([1, dim], dtype='float')
    fitG  = float('inf')
    Xpb   = np.zeros([N, dim], dtype='float')
    fitP  = float('inf') * np.ones([N, 1], dtype='float')
    curve = np.zeros([1, max_iter], dtype='float') 
    t     = 0    
    while t < max_iter:
        # Binary conversion
        Xbin = binary_conversion(X, thres, N, dim)        
        # Fitness
        for i in range(N):
            fit[i,0] = Fun(xtrain, ytrain, Xbin[i,:], opts)
            if fit[i,0] < fitP[i,0]:
                Xpb[i,:]  = X[i,:]
                fitP[i,0] = fit[i,0]
            if fitP[i,0] < fitG:
                Xgb[0,:]  = Xpb[i,:]
                fitG      = fitP[i,0]        
        # Store result
        curve[0,t] = fitG.copy()
        print("Iteration:", t + 1)
        print("Best (PSO):", curve[0,t])
        t += 1        
        for i in range(N):
            for d in range(dim):
                # Update velocity
                r1     = rand()
                r2     = rand()
                V[i,d] = w * V[i,d] + c1 * r1 * (Xpb[i,d] - X[i,d]) + c2 * r2 * (Xgb[0,d] - X[i,d]) 
                # Boundary
                V[i,d] = boundary(V[i,d], Vmin[0,d], Vmax[0,d])
                # Update position
                X[i,d] = X[i,d] + V[i,d]
                # Boundary
                X[i,d] = boundary(X[i,d], lb[0,d], ub[0,d])                
    # Best feature subset
    Gbin       = binary_conversion(Xgb, thres, 1, dim) 
    Gbin       = Gbin.reshape(dim)
    pos        = np.asarray(range(0, dim))    
    sel_index  = pos[Gbin == 1]
    num_feat   = len(sel_index)
    # Create dictionary
    pso_data = {'sf': sel_index, 'c': curve, 'nf': num_feat}    
    return pso_data    
###############################################################################
def pso(xtrain,ytrain):
    feat=np.asarray(xtrain)   # feature vector
    label=np.asarray(ytrain)   #label vector
    xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.2, random_state = 10)
    fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
    k    = 5     # k-value in KNN
    N    = 30    # number of particles
    T    = 100   # maximum number of iterations
    w    = 0.9
    c1   = 2
    c2   = 2
    opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'w':w, 'c1':c1, 'c2':c2}
    # perform feature selection
    fmdl = jfs(feat, label, opts)
    sf   = fmdl['sf']
    return sf


def trans_pso(Train,sf):
    return Train[Train.columns[sf].values]
    

# In[ ]:


# In[ ]:


#####################CSA#############
import random
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
# error rate
def error_rate(xtrain, ytrain, x, opts):
    # parameters
    k     = opts['k']
    fold  = opts['fold']
    xt    = fold['xt']
    yt    = fold['yt']
    xv    = fold['xv']
    yv    = fold['yv']
    
    # Number of instances
    num_train = np.size(xt, 0)
    num_valid = np.size(xv, 0)
    # Define selected features
    xtrain  = xt[:, x == 1]
    ytrain  = yt.reshape(num_train)  # Solve bug
    xvalid  = xv[:, x == 1]
    yvalid  = yv.reshape(num_valid)  # Solve bug   
    # Training
    mdl     = KNeighborsClassifier(n_neighbors = k)
    mdl.fit(xtrain, ytrain)
    # Prediction
    ypred   = mdl.predict(xvalid)
    acc     = np.sum(yvalid == ypred) / num_valid
    error   = 1 - acc 
    return error
# Error rate & Feature size
def Fun(xtrain, ytrain, x, opts):
    # Parameters
    alpha    = 0.99
    beta     = 1 - alpha
    # Original feature size
    max_feat = len(x)
    # Number of selected features
    num_feat = np.sum(x == 1)
    # Solve if no feature selected
    if num_feat == 0:
        cost  = 1
    else:
        # Get error rate
        error = error_rate(xtrain, ytrain, x, opts)
        # Objective function
        cost  = alpha * error + beta * (num_feat / max_feat)   
    return cost

###########################################################
import numpy as np
from numpy.random import rand
#from FS.functionHO import Fun
import math


def init_position(lb, ub, N, dim):
    X = np.zeros([N, dim], dtype='float')
    for i in range(N):
        for d in range(dim):
            X[i,d] = lb[0,d] + (ub[0,d] - lb[0,d]) * rand()        
    
    return X


def binary_conversion(X, thres, N, dim):
    Xbin = np.zeros([N, dim], dtype='int')
    for i in range(N):
        for d in range(dim):
            if X[i,d] > thres:
                Xbin[i,d] = 1
            else:
                Xbin[i,d] = 0
    
    return Xbin


def boundary(x, lb, ub):
    if x < lb:
        x = lb
    if x > ub:
        x = ub
    
    return x


# Levy Flight
def levy_distribution(beta, dim):
    # Sigma     
    nume  = math.gamma(1 + beta) * np.sin(np.pi * beta / 2)
    deno  = math.gamma((1 + beta) / 2) * beta * 2 ** ((beta - 1) / 2)
    sigma = (nume / deno) ** (1 / beta) 
    # Parameter u & v 
    u     = np.random.randn(dim) * sigma
    v     = np.random.randn(dim)
    # Step 
    step  = u / abs(v) ** (1 / beta)
    LF    = 0.01 * step
    
    return LF


def jfs(xtrain, ytrain, opts):
    # Parameters
    ub     = 1
    lb     = 0
    thres  = 0.5
    Pa     = 0.25     # discovery rate
    alpha  = 1        # constant
    beta   = 1.5      # levy component
    
    N          = opts['N']
    max_iter   = opts['T']
    if 'Pa' in opts:
        Pa   = opts['Pa'] 
    if 'alpha' in opts:
        alpha   = opts['alpha'] 
    if 'beta' in opts:
        beta  = opts['beta'] 
        
    # Dimension
    dim = np.size(xtrain, 1)
    if np.size(lb) == 1:
        ub = ub * np.ones([1, dim], dtype='float')
        lb = lb * np.ones([1, dim], dtype='float')
        
    # Initialize position 
    X     = init_position(lb, ub, N, dim)
    
    # Binary conversion
    Xbin  = binary_conversion(X, thres, N, dim)
    
    # Fitness at first iteration
    fit   = np.zeros([N, 1], dtype='float')
    Xgb   = np.zeros([1, dim], dtype='float')
    fitG  = float('inf')
    
    for i in range(N):
        fit[i,0] = Fun(xtrain, ytrain, Xbin[i,:], opts)
        if fit[i,0] < fitG:
            Xgb[0,:] = X[i,:]
            fitG     = fit[i,0]
    
    # Pre
    curve = np.zeros([1, max_iter], dtype='float') 
    t     = 0
    
    curve[0,t] = fitG.copy()
    print("Generation:", t + 1)
    print("Best (CS):", curve[0,t])
    t += 1
        
    while t < max_iter:  
        Xnew  = np.zeros([N, dim], dtype='float') 
        
        # {1} Random walk/Levy flight phase
        for i in range(N):
            # Levy distribution
            L = levy_distribution(beta,dim)
            for d in range(dim):
                # Levy flight (1)
                Xnew[i,d] = X[i,d] + alpha * L[d] * (X[i,d] - Xgb[0,d]) 
                # Boundary
                Xnew[i,d] = boundary(Xnew[i,d], lb[0,d], ub[0,d])
      
        # Binary conversion
        Xbin = binary_conversion(Xnew, thres, N, dim)
        
        # Greedy selection
        for i in range(N):
            Fnew = Fun(xtrain, ytrain, Xbin[i,:], opts)
            if Fnew <= fit[i,0]:
                X[i,:]   = Xnew[i,:]
                fit[i,0] = Fnew             
                
            if fit[i,0] < fitG:
                Xgb[0,:] = X[i,:]
                fitG     = fit[i,0]
        
        # {2} Discovery and abandon worse nests phase
        J  = np.random.permutation(N)
        K  = np.random.permutation(N)
        Xj = np.zeros([N, dim], dtype='float')
        Xk = np.zeros([N, dim], dtype='float')
        for i in range(N):
            Xj[i,:] = X[J[i],:]
            Xk[i,:] = X[K[i],:]
        
        Xnew  = np.zeros([N, dim], dtype='float') 
        
        for i in range(N): 
            Xnew[i,:] = X[i,:]
            r         = rand()
            for d in range(dim):
                # A fraction of worse nest is discovered with a probability
                if rand() < Pa:
                    Xnew[i,d] = X[i,d] + r * (Xj[i,d] - Xk[i,d])
                
                # Boundary
                Xnew[i,d] = boundary(Xnew[i,d], lb[0,d], ub[0,d])
        
        # Binary conversion
        Xbin = binary_conversion(Xnew, thres, N, dim)
        
        # Greedy selection
        for i in range(N):
            Fnew = Fun(xtrain, ytrain, Xbin[i,:], opts)
            if Fnew <= fit[i,0]:
                X[i,:]   = Xnew[i,:]
                fit[i,0] = Fnew             
                
            if fit[i,0] < fitG:
                Xgb[0,:] = X[i,:]
                fitG     = fit[i,0]
                
        # Store result
        curve[0,t] = fitG.copy()
        print("Generation:", t + 1)
        print("Best (CS):", curve[0,t])
        t += 1            

            
    # Best feature subset
    Gbin       = binary_conversion(Xgb, thres, 1, dim) 
    Gbin       = Gbin.reshape(dim)
    pos        = np.asarray(range(0, dim))    
    sel_index  = pos[Gbin == 1]
    num_feat   = len(sel_index)
    # Create dictionary
    csa_data = {'sf': sel_index, 'c': curve, 'nf': num_feat}
    
    return csa_data  
##################################
def csa(xtrain,ytrain):
    feat=np.asarray(xtrain)   # feature vector
    label=np.asarray(ytrain)   #label vector
    xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.2, random_state = 10)
    fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
    k    = 5     # k-value in KNN
    N    = 30    # number of particles
    T    = 100   # maximum number of iterations
    w    = 0.9
    c1   = 2
    c2   = 2
    opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'w':w, 'c1':c1, 'c2':c2}
    # perform feature selection
    fmdl = jfs(feat, label, opts)
    sf   = fmdl['sf']
    return sf


def trans_csa(Train,sf):
    return Train[Train.columns[sf].values]


# In[ ]:


# In[ ]:


#####################DE###############################
import numpy as np
from numpy.random import rand
#from FS.functionHO import Fun


def init_position(lb, ub, N, dim):
    X = np.zeros([N, dim], dtype='float')
    for i in range(N):
        for d in range(dim):
            X[i,d] = lb[0,d] + (ub[0,d] - lb[0,d]) * rand()        
    
    return X


def binary_conversion(X, thres, N, dim):
    Xbin = np.zeros([N, dim], dtype='int')
    for i in range(N):
        for d in range(dim):
            if X[i,d] > thres:
                Xbin[i,d] = 1
            else:
                Xbin[i,d] = 0
    
    return Xbin


def boundary(x, lb, ub):
    if x < lb:
        x = lb
    if x > ub:
        x = ub
    
    return x
    

def jfs(xtrain, ytrain, opts):
    # Parameters
    ub    = 1
    lb    = 0
    thres = 0.5
    CR    = 0.9     # crossover rate
    F     = 0.5     # factor
    
    N        = opts['N']
    max_iter = opts['T']
    if 'CR' in opts:
        CR   = opts['CR'] 
    if 'F' in opts:
        F    = opts['F']     
    
    # Dimension
    dim = np.size(xtrain, 1)
    if np.size(lb) == 1:
        ub = ub * np.ones([1, dim], dtype='float')
        lb = lb * np.ones([1, dim], dtype='float')
        
    # Initialize position 
    X     = init_position(lb, ub, N, dim)
    
    # Binary conversion
    Xbin  = binary_conversion(X, thres, N, dim)
    
    # Fitness at first iteration
    fit   = np.zeros([N, 1], dtype='float')
    Xgb   = np.zeros([1, dim], dtype='float')
    fitG  = float('inf')
    
    for i in range(N):
        fit[i,0] = Fun(xtrain, ytrain, Xbin[i,:], opts)
        if fit[i,0] < fitG:
            Xgb[0,:] = X[i,:]
            fitG     = fit[i,0]
    
    # Pre
    curve = np.zeros([1, max_iter], dtype='float') 
    t     = 0
    
    curve[0,t] = fitG.copy()
    print("Generation:", t + 1)
    print("Best (DE):", curve[0,t])
    t += 1

    while t < max_iter:  
        V = np.zeros([N, dim], dtype='float')
        U = np.zeros([N, dim], dtype='float')
        
        for i in range(N):
            # Choose r1, r2, r3 randomly, but not equal to i 
            RN = np.random.permutation(N)
            for j in range(N):
                if RN[j] == i:
                    RN = np.delete(RN, j)
                    break
                
            r1 = RN[0]
            r2 = RN[1]
            r3 = RN[2]
            # mutation (2)
            for d in range(dim):
                V[i,d] = X[r1,d] + F * (X[r2,d] - X[r3,d])
                # Boundary
                V[i,d] = boundary(V[i,d], lb[0,d], ub[0,d])
            
            # Random one dimension from 1 to dim
            index = np.random.randint(low = 0, high = dim)
            # crossover (3-4)
            for d in range(dim):
                if (rand() <= CR)  or  (d == index):
                    U[i,d] = V[i,d]
                else:
                    U[i,d] = X[i,d]
        
        # Binary conversion
        Ubin = binary_conversion(U, thres, N, dim)
        
        # Selection
        for i in range(N):
            fitU = Fun(xtrain, ytrain, Ubin[i,:], opts)
            if fitU <= fit[i,0]:
                X[i,:]   = U[i,:]
                fit[i,0] = fitU
                
            if fit[i,0] < fitG:
                Xgb[0,:] = X[i,:]
                fitG     = fit[i,0]
            
                
        # Store result
        curve[0,t] = fitG.copy()
        print("Generation:", t + 1)
        print("Best (DE):", curve[0,t])
        t += 1            

            
    # Best feature subset
    Gbin       = binary_conversion(Xgb, thres, 1, dim) 
    Gbin       = Gbin.reshape(dim)
    pos        = np.asarray(range(0, dim))    
    sel_index  = pos[Gbin == 1]
    num_feat   = len(sel_index)
    # Create dictionary
    de_data = {'sf': sel_index, 'c': curve, 'nf': num_feat}
    
    return de_data  
################################################
def de(xtrain,ytrain):
    feat=np.asarray(xtrain)   # feature vector
    label=np.asarray(ytrain)   #label vector
    xtrain, xtest, ytrain, ytest = train_test_split(feat, label, test_size=0.2, random_state = 10)
    fold = {'xt':xtrain, 'yt':ytrain, 'xv':xtest, 'yv':ytest}
    k    = 5     # k-value in KNN
    N    = 30    # number of particles
    T    = 100   # maximum number of iterations
    w    = 0.9
    c1   = 2
    c2   = 2
    opts = {'k':k, 'fold':fold, 'N':N, 'T':T, 'w':w, 'c1':c1, 'c2':c2}
    # perform feature selection
    fmdl = jfs(feat, label, opts)
    sf   = fmdl['sf']
    return sf


def trans_de(Train,sf):
    return Train[Train.columns[sf].values]
            

# In[ ]:


# In[ ]:


#######t-test##############
from scipy import stats
def select7(xtrain,ytrain):
    n = len(ytrain)
    p = xtrain.shape[1]
    stats_t = np.zeros(p)
 #   stats_w = np.zeros(p)
    for j in range(p):
        x = xtrain.iloc[:,j]
        x0 = x[ytrain==0]
        x1 = x[ytrain==1]
        stat_t = stats.ttest_ind(x0,x1)
  #      stat_w = stats.ranksums(x0,x1)
        stats_t[j] = stat_t.pvalue
   #     stats_w[j] = stat_w.pvalue
    select = xtrain.iloc[:,stats_t<0.0005]  
    #select=select.columns
   # stats_wX = X.iloc[:,stats_w<0.05]
    return select


# In[ ]:


# In[ ]:


#######Wilcoxon-test##############
from scipy import stats
def select8(xtrain,ytrain):
    n = len(ytrain)
    p = xtrain.shape[1]
   # stats_t = np.zeros(p)
    stats_w = np.zeros(p)
    for j in range(p):
        x = xtrain.iloc[:,j]
        x0 = x[ytrain==0]
        x1 = x[ytrain==1]
      #  stat_t = stats.ttest_ind(x0,x1)
        stat_w = stats.ranksums(x0,x1)
    #    stats_t[j] = stat_t.pvalue
        stats_w[j] = stat_w.pvalue
    select = xtrain.iloc[:,stats_w<0.0001]  
   # select=select.columns
   # xtest=xtest.loc[:,select]
   # select=np.matrix(select)
   # stats_wX = X.iloc[:,stats_w<0.05]
    return select


# In[ ]:


# In[ ]:


from sklearn.model_selection import cross_val_score, cross_validate
from sklearn import svm

start_time = datetime.now()


def T(clf, xtrain, xtest, ytrain, ytest):

    print(xtrain.shape)
    select7_data = select7(xtrain, ytrain)
    # train_pred(select1_model,DT)
    xtrain = (select7_data)
    xtest = (xtest[select7_data.columns])
    # xtest=trans_data(select7_model,xtest)
    xtrain.shape

    print(xtrain.columns)
    print(xtrain.shape)

    lr = train_pred_clf(clf, xtrain, xtest, ytrain, ytest)


    ############DT##########################################
    scores = cross_val_score(DT, xtrain, ytrain, cv=10)  #
    print('DT--scores', scores)  
    print("DT--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('DT ：', scores)  # 

    ############SVM##########################################
    scores = cross_val_score(SV, xtrain, ytrain, cv=10) 
    print('SV--scores', scores)  # 
    print("SV--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('SV--：', scores) 
    ############LR##########################################
    scores = cross_val_score(LR, xtrain, ytrain, cv=10)  
    print('LR--scores', scores)  
    print("LR--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('LR：', scores)  #

    return xtest, ytest, lr


a1, b1, c1 = T(DT, xtrain, xtest, ytrain, ytest)
a2, b2, c2 = T(SV, xtrain, xtest, ytrain, ytest)
a3, b3, c3 = T(LR, xtrain, xtest, ytrain, ytest)

end_time = datetime.now()
print('running time: {}'.format(end_time - start_time))


# In[ ]:


# In[ ]:


from sklearn.model_selection import cross_val_score, cross_validate
from sklearn import svm
import time

start_time = datetime.now()


def W(clf, xtrain, xtest, ytrain, ytest):

    print(xtrain.shape)
    select8_data = select8(xtrain, ytrain)
    # train_pred(select1_model,DT)
    xtrain = (select8_data)
    xtest = (xtest[select8_data.columns])
    # xtest=trans_data(select7_model,xtest)
    xtrain.shape

    print(xtrain.columns)
    print(xtrain.shape)

    lr = train_pred_clf(clf, xtrain, xtest, ytrain, ytest)

    ############DT##########################################
    scores = cross_val_score(DT, xtrain, ytrain, cv=10)  #
    print('DT--scores', scores)  # 
    print("DT--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('DT result：', scores)  # 

    ############SVM##########################################
    scores = cross_val_score(SV, xtrain, ytrain, cv=10)  
    print('SV--scores', scores)  # 
    print("SV--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('SV--：', scores)  
    ############LR##########################################
    scores = cross_val_score(LR, xtrain, ytrain, cv=10) 
    print('LR--scores', scores)  #
    print("LR--Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2))
    scoring = ['precision_macro', 'recall_macro']

    scores = cross_validate(DT, xtrain, ytrain, scoring=scoring, cv=10, return_train_score=True)
    sorted(scores.keys())
    print('LR--：', scores)  

    return xtest, ytest, lr


a1, b1, c1 = W(DT, xtrain, xtest, ytrain, ytest)
a2, b2, c2 = W(SV, xtrain, xtest, ytrain, ytest)
a3, b3, c3 = W(LR, xtrain, xtest, ytrain, ytest)

end_time = datetime.now()
print('running time: {}'.format(end_time - start_time))


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]: