# Import


In [1]:
import datetime as dt
import pandas as pd

## CPU
import numpy as np
parallelType = np

'''
## GPU
import cupy as cp
parallelType = cp
'''
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, Normalizer, MaxAbsScaler, RobustScaler, MinMaxScaler, QuantileTransformer
from scipy.io import arff


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# ABC-ANN

## ABC

In [3]:
import tensorflow as tf
import keras as keras
from keras import initializers
class ABC_ANN_Binary:
  def __init__(self, inputX, target, hiddenLayerSize, P, limit, lb, ub, MR, parallelType, activation):
    self.comp = parallelType # This code line determines the parallel type using numpy or cupy
    self.FVS = inputX.shape[1]
    self.X = inputX
    # self.XwithBias = self.comp.append(self.comp.ones((self.X.shape[0], 1)), self.X, axis=1)
    self.y = target
    self.P = P  # P is population size
    self.limit = limit
    # D refers to dimension
    self.HLS = hiddenLayerSize
    self.D = (self.FVS + 1) * self.HLS + self.HLS + 1
    self.lb = lb  # lower bound for parameters
    self.ub = ub  # upper bound for parameters
    self.MR = MR  # modification rate
    self.evaluationNumber = 0
    self.activation = activation
    self.tmpID = [-1] * self.P
    #self.Foods = self.lb + self.comp.random.rand(self.P, self.D) * (self.ub - self.lb)

    initializer = tf.keras.initializers.GlorotUniform()
    values = initializer(shape=(self.P, self.D))
    values = values.numpy()
    #self.Foods =  cp.array(values)
    self.Foods =  self.lb + parallelType.array(values)* (self.ub - self.lb)

    # self.Foods = self.comp.random.uniform(self.lb, self.ub, size = (self.P, self.D))
    self.solution = self.comp.copy(self.Foods)
    self.f = self.calculateF(self.Foods)
    self.fitness = 1 / (1 + self.f)
    self.trial = self.comp.zeros(P)
    self.globalMin = self.f[0, 0]
    self.globalParams = self.comp.copy(self.Foods[0:1])  # 1st row
    self.scoutBeeCounts = 0

  def create_new(self, index):
    new_sol = self.lb + self.comp.random.rand(1, self.D) * (self.ub - self.lb)
    # new_sol = self.comp.random.uniform(self.lb, self.ub, size = (1, self.D))
    self.Foods[index, :] = new_sol.flatten()
    self.solution[index, :] = self.comp.copy(new_sol.flatten())
    self.f[index] = self.calculateF(new_sol)[0]
    self.fitness[index] = 1 / (1 + self.f[index])
    self.trial[index] = 0
    self.scoutBeeCounts += 1

  def memorizeBestSource(self):
    index = self.comp.argmin(self.f)
    if self.f[index, 0] < self.globalMin:
      self.globalMin = self.f[index, 0]
      self.globalParams = self.comp.copy(self.Foods[index: index + 1])

  def calculateProbabilities(self):
    maxfit = self.comp.max(self.fitness)
    self.prob = (0.9 / maxfit * self.fitness) + 0.1

  def sendEmployedBees(self):
    for i in range(self.P):  # for each clone
      ar = self.comp.random.rand(self.D)
      param2change = self.comp.where(ar < self.MR)[0]

      neighbour = self.comp.random.randint(0, self.P)
      while neighbour == i:
        neighbour = self.comp.random.randint(0, self.P)

      self.solution[i, :] = self.comp.copy(self.Foods[i, :])

      # random number generation between -1 and 1 values
      r = -1 + (1 + 1) * self.comp.random.rand()
      self.solution[i, param2change] = self.Foods[i, param2change] + r * (self.Foods[i, param2change] - self.Foods[neighbour, param2change])  # self.comp.copy ?
      self.solution[i, param2change] = self.comp.where(self.solution[i, param2change] < self.lb, self.lb, self.solution[i, param2change])
      self.solution[i, param2change] = self.comp.where(self.solution[i, param2change] > self.ub, self.ub, self.solution[i, param2change])

  def sendOnLookerBees(self):
    i = 0
    t = 0
    while t < self.P:
      if self.comp.random.rand() < self.prob[i, 0]:
        ar = self.comp.random.rand(self.D)
        param2change = self.comp.where(ar < self.MR)[0]

        neighbour = self.comp.random.randint(self.P)
        while neighbour == i:
          neighbour = self.comp.random.randint(self.P)

        self.solution[t, :] = self.comp.copy(self.Foods[i, :])
        # v_{ij} = x_{ij} + phi_{ij}*(x_{kj}-x_{ij})
        # random number generation between -1 and 1 values
        r = -1 + (1 + 1) * self.comp.random.rand()
        self.solution[t, param2change] = self.Foods[i, param2change] + r * (self.Foods[i, param2change] - self.Foods[neighbour, param2change])  # self.comp.copy ?
        self.tmpID[t] = i

        self.solution[t, param2change] = self.comp.where(self.solution[t, param2change] < self.lb, self.lb, self.solution[t, param2change])
        self.solution[t, param2change] = self.comp.where(self.solution[t, param2change] > self.ub, self.ub, self.solution[t, param2change])
        t += 1
      i += 1
      if i >= self.P:
        i = 0

  def sendScoutBees(self):
    index = self.comp.argmax(self.trial)
    if self.trial[index] >= self.limit:
      self.create_new(index)

  def calculateF(self, foods):
    predictionMatrix = self.comp.zeros((self.X.shape[0], foods.shape[0]))
    predictionMatrix += foods[:,-1] # bias addition
    for i in range(0, self.HLS):
      W = foods[:, i*self.FVS : (i+1)*self.FVS].T
      b = foods[:, self.FVS * self.HLS + self.HLS + i]

      if self.activation == 'sigmoid':
        z_i = self.sig(self.X.dot(W) + b) # Sigmoid
      if self.activation == 'tanh':
        z_i = self.comp.tanh(self.X.dot(W) + b) # TanH
      predictionMatrix += z_i * foods[:, self.FVS * self.HLS + i]

    predictionMatrix = self.sig(predictionMatrix)

    ## Mean Absolute Error - MAE
    f = self.comp.mean(self.comp.abs(predictionMatrix - self.y), axis=0, keepdims=True).T
    self.evaluationNumber += len(f)
    # print(f"Eval Num: {self.evaluationNumber}")
    return f

  def ReLU(self, x):
    return self.comp.maximum(x, 0)

  def sig(self, n):  # Sigmoid function
    return 1 / (1 + self.comp.exp(-n))

## Learn ABC

In [4]:
class LearnABC:
  def __init__(self, inputX, target, hiddenLayerSize, P, limit, lb, ub, MR, parallelType, evaluationNumber, activation):
    self.comp = parallelType
    self.abc = ABC_ANN_Binary(inputX, target, hiddenLayerSize, P, limit, lb, ub, MR, parallelType, activation)
    self.total_numberof_evaluation = evaluationNumber

  def learn(self):
    self.f_values = []
    self.f_values.append(self.comp.min(self.abc.f))
    self.abc.memorizeBestSource()

      # sayac = 0
    while self.abc.evaluationNumber <= self.total_numberof_evaluation:
      self.abc.sendEmployedBees()
      objValSol = self.abc.calculateF(self.abc.solution)
      fitnessSol = 1 / (1 + objValSol)
      # a greedy selection is applied between the current solution i and its mutant
      # If the mutant solution is better than the current solution i, replace the solution with the mutant and reset the trial counter of solution i

      ind = self.comp.where(fitnessSol > self.abc.fitness)[0]
      ind2 = self.comp.where(fitnessSol <= self.abc.fitness)[0]
      self.abc.trial[ind] = 0

      self.abc.Foods[ind, :] = self.abc.solution[ind, :]
      self.abc.f[ind] = objValSol[ind]
      self.abc.fitness[ind] = fitnessSol[ind]
      # if the solution i can not be improved, increase its trial counter
      self.abc.trial[ind2] += 1

      self.abc.calculateProbabilities()
      self.abc.sendOnLookerBees()

      objValSol = self.abc.calculateF(self.abc.solution)
      fitnessSol = 1 / (1 + objValSol)

      for i in range(self.abc.P):
        t = self.abc.tmpID[i]
        if fitnessSol[i] > self.abc.fitness[t]:
          self.abc.trial[t] = 0
          self.abc.Foods[t, :] = self.abc.solution[i, :]
          self.abc.f[t] = objValSol[i]
          self.abc.fitness[t] = fitnessSol[i]
        else:
          self.abc.trial[t] += 1

      self.abc.memorizeBestSource()
      self.abc.sendScoutBees()

      self.f_values.append(self.comp.min(self.abc.f))
      # sayac += 1;
      # if sayac % 5000 == 0: print(f"Saya√ß = {sayac}")

    self.net = self.abc.globalParams
    self.globalMin = self.abc.globalMin
    # print(f"Evaluation Number: {self.abc.evaluationNumber}")
    print(f"The number of scout bees: {self.abc.scoutBeeCounts}")

## ABC-ANN main

In [5]:
class ABC_LR_Model():
  #def __init__(self, hiddenLayerSize=3, lb=-32, ub=32, evaluationNumber=60000, limit=50, P=40, MR=0.1, thres=0.5, parallelType=None, activation='sigmoid'):
  def __init__(self, hiddenLayerSize, lb, ub, evaluationNumber, limit, P, MR, thres, parallelType, activation):
    '''
    lb is lower bound for parameters to be learned
    ub is upper bound for parameters to be learned
    limit determines whether a scout bee can be created.
    If a solution cannot be improved up to the limit number, a scout bee is created instead of the solution.
    '''
    self.lb = lb
    self.ub = ub
    self.evaluationNumber = evaluationNumber
    self.limit = limit
    self.P = P
    self.MR = MR
    self.parallelType = parallelType
    self.HLS = hiddenLayerSize
    self.thres = thres
    self.activation= activation

  def fit(self, trainX, trainY):
    learn = LearnABC(trainX, trainY, self.HLS, self.P, self.limit, self.lb, self.ub, self.MR, self.parallelType, self.evaluationNumber, self.activation)
    learn.learn()
    self.net = learn.net

  def __str__(self) -> str:
      return f"lb={self.lb}, ub={self.ub}, evaNumber={self.evaluationNumber}, limit={self.limit}, P={self.P}, MR={self.MR}, HLS={self.HLS}, act={self.activation}"

  def sig(self, x):
    return 1 / (1 + self.parallelType.exp(-x))

  def ReLU(self, x):
    return self.parallelType.maximum(x, 0)

  def score(self, X, y):
    D = X.shape[1]
    W1 = self.net[:, 0 : D * self.HLS].reshape((D, self.HLS), order='F')
    startIndexFirstBias = D * self.HLS + self.HLS
    endIndexFirstBias = startIndexFirstBias + self.HLS
    b1 = self.net[:, startIndexFirstBias : endIndexFirstBias]

    if self.activation=='sigmoid':
    	Z = self.sig(X.dot(W1) + b1) # Sigmoid
    if self.activation=='tanh':
    	Z = self.parallelType.tanh(X.dot(W1) + b1) # TanH
    W2 = self.net[:, D * self.HLS : startIndexFirstBias].reshape((self.HLS, 1))
    b2 = self.net[:, -1]
    A = self.sig(Z.dot(W2) + b2)
    p = self.parallelType.where(A >= self.thres, 1, 0) #prediction
    acc = self.parallelType.average(y == p)
    return [acc, p]



# Prepare Dataset

## Feature extraction and selection on UNSW-NB15

In [6]:
#------------Load UNSW-NB15 Dataset---------------------------
trainData = pd.DataFrame(arff.loadarff('/content/gdrive/My Drive/federated/datasets/2015/Train.arff')[0])
testData = pd.DataFrame(arff.loadarff('/content/gdrive/My Drive/federated/datasets/2015/Test.arff')[0])

trainLabels = trainData.iloc[:,-1].astype("int32").values.reshape(-1, 1)
testLabels = testData.iloc[:,-1].astype("int32").values.reshape(-1, 1)

trainData = trainData.iloc[:,:-1].values
testData = testData.iloc[:,:-1].values

# Sparcity ratios
print(f"Sparsity ratio train set: {np.sum(trainData == 0) / (trainData.shape[0] * trainData.shape[1])}")
print(f"Sparsity ratio test set: {np.sum(testData == 0) / (testData.shape[0] * testData.shape[1])}")

# scaler = Normalizer()
# scaler = QuantileTransformer(random_state=0)
# scaler = MinMaxScaler()
# scaler = RobustScaler()
# scaler = StandardScaler()
scaler = MaxAbsScaler()
trainData = scaler.fit_transform(trainData)
testData = scaler.transform(testData)

Sparsity ratio train set: 0.8614705467794591
Sparsity ratio test set: 0.8572446104225103


In [None]:
#------------Autoencoder with Bayesian optimization---------------------------

import sys
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from keras.layers import BatchNormalization,LeakyReLU,Dense
import keras
from sklearn.metrics import f1_score
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import Model

global x
x=0
da =[]
min_reconstr_error=999.0
seed=2
val_ratio = 0.2
trainData, valData, trainLabels, valLabels = train_test_split(trainData, trainLabels, test_size=val_ratio, random_state=33)
valData = parallelType.array(valData)
valLabels = parallelType.array(valLabels)

def objective(params):
    global min_reconstr_error
    lr=(params['eta'])
    u1=int(params['units1'])
    u2=int(params['units2'])
    d1=(params['dropout1'])
    d2=(params['dropout2'])
    bs=int(params['batch_size'])
    ep=int(params['epochs'])
    act=(params['activation'])

    input_dim = trainData.shape[1]

    input_layer = (keras.layers.Input(shape=(input_dim,)))

    encoder=Dense(u1,kernel_initializer=tf.keras.initializers.LecunUniform(),activation=act)(input_layer)
    encoder=BatchNormalization()(encoder)
    encoder =(keras.layers.Dropout(d1))(encoder)

    bottleneck=Dense(u2,activation=act)(encoder)
    bottleneck=BatchNormalization()(bottleneck)

    decoder = Dense(u1,activation=act)(bottleneck)
    decoder=BatchNormalization()(decoder)
    decoder =(keras.layers.Dropout(d2))(decoder)

    decoder = Dense(input_dim, activation=act)(decoder)

    autoencoder = Model(inputs=input_layer, outputs=decoder)

    autoencoder.compile(loss='mse',optimizer=keras.optimizers.Adam(lr=lr))
    autoencoder.summary()
    callback=EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=10)

    autoencoder.fit(trainData, trainData,
                    epochs=int(ep),
                    batch_size=bs,
                    validation_data=(valData, valData),
                    callbacks=[callback]
              #batch_size=bs
              #verbose=0
    )

    reconstr_error = autoencoder.evaluate(testData, testData, verbose=0)
    print(reconstr_error)


    if reconstr_error < min_reconstr_error:
         min_reconstr_error=reconstr_error
         da.append(min_reconstr_error)
         encoded_model=Model(inputs=input_layer, outputs=bottleneck)
         encoded_model.save('UNSW_AE_ENCODER_BEST_100iter.h5')
         da.append(min_reconstr_error)


    sys.stdout.flush()

    return {'loss': reconstr_error, 'status': STATUS_OK, 'model': autoencoder}


def optimize(trial):
    params = {'eta':hp.uniform('eta',10e-8,10e-1),
              'units1':hp.uniform('units1',30,100),
              'units2':hp.uniform('units2',30,100),
              'dropout1':hp.uniform('dropout1',0,.3),
              'dropout2':hp.uniform('dropout2',0,.3),
              'batch_size':hp.uniform('batch_size',1,1024),
              'epochs':hp.uniform('epochs',1,1000),
              'activation': hp.choice('activation',['tanh','sigmoid']),
        }
    best=fmin(fn=objective,
              space=params,
              algo=tpe.suggest,
              trials=trial,
              max_evals=100
              )

    return best

import time
start_time = time.time()
trial=Trials()
trial.mybest = None
best=optimize(trial)
print("--- %s seconds for Bayesian optimization ---" % (time.time() - start_time))



  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]



Model: "model"

_________________________________________________________________

 Layer (type)                Output Shape              Param #   


 input_1 (InputLayer)        [(None, 196)]             0         

 dense (Dense)               (None, 42)                8274      

 batch_normalization (Batch  (None, 42)                168       

 Normalization)                                                  

 dropout (Dropout)           (None, 42)                0         

 dense_1 (Dense)             (None, 92)                3956      

 batch_normalization_1 (Bat  (None, 92)                368       

 chNormalization)                                                

 dense_2 (Dense)             (None, 42)                3906      

 batch_normalization_2 (Bat  (None, 42)                168       

 chNormalization)                                                

 dropout_1 (Dropout)         (None, 42)                0         

 dense_3 (Dense)             (None, 196)     

In [None]:
#------------after Autoencoder-load encoded model---------------------------
from keras.models import load_model
from google.colab import drive
drive.mount('/content/gdrive')
encoder = load_model('/content/gdrive/My Drive/Colab Notebooks/h5files/UNSW_AE_ENCODER_BEST_100iter.h5')
encoder.summary()

trainData_encoded=encoder.predict(trainData)
testData_encoded=encoder.predict(testData)

trainData_all=np.concatenate((trainData,trainData_encoded),axis=1)
testData_all=np.concatenate((testData,testData_encoded),axis=1)

In [None]:
# plot feature importance (XGBoost 5 fold cv)
from numpy import loadtxt
from xgboost import XGBClassifier
from matplotlib import pyplot as plt
from xgboost import plot_importance
from sklearn.model_selection import KFold


kf = KFold(n_splits=5)
import numpy
print(trainData_all.shape[1])
importances= numpy.zeros(trainData_all.shape[1])
for train_indices, test_indices in kf.split(trainData_all):
    # fit model no training data
    print(train_indices.shape)
    print(test_indices.shape)
    model = XGBClassifier() #weight
#    model = XGBClassifier(importance_type='cover')
#    model = XGBClassifier(importance_type='gain')
    model.fit(trainData_all[train_indices], trainLabels[train_indices])
    # feature importance
    print(model.feature_importances_)
    importances  = importances+ model.feature_importances_

importances=importances/5


In [None]:
#------------select 30 features---------------------------
indices = np.argsort(importances)[-30:]
trainData_reduced=trainData_all[:, indices]
testData_reduced=testData_all[:, indices]

plt.figure()
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), indices+1)
plt.xlabel('Relative Importance (weight)')
plt.show()


In [None]:

val = False
val_ratio = 0.2

if val:
  trainData, valData, trainLabels, valLabels = train_test_split(trainData, trainLabels, test_size=val_ratio, random_state=33)
  valData = parallelType.array(valData)
  valLabels = parallelType.array(valLabels)


## GPU
import cupy as cp
parallelType = cp

trainData = parallelType.array(trainData)
trainLabels = parallelType.array(trainLabels)
testData = parallelType.array(testData)
testLabels = parallelType.array(testLabels)

trainData_encoded=parallelType.array(trainData_encoded)
testData_encoded=parallelType.array(testData_encoded)
trainData_all=parallelType.array(trainData_all)
testData_all=parallelType.array(testData_all)
trainData_reduced=parallelType.array(trainData_reduced)
testData_reduced=parallelType.array(testData_reduced)



In [None]:
print(trainData_reduced.shape)
print(testData_reduced.shape)

print(trainLabels.shape)
print(testLabels.shape)

# Bayesian Optimization for ABC_ANN

In [None]:
#------------Bayesian Optimization---------------------------
import sys
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from keras.models import load_model,save_model
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
max_acc = 0


def objective(params):
    global max_acc
    global counter

    lb=params['lb']
    ub=params['ub']
    evaluationNumber=int(params['evaluationNumber'])
    limit=int(params['limit'])
    P=int(params['P'])
    MR=params['MR']
    L2=params['L2']
    FVS = trainData.shape[1]
    HLS=int(params['HLS'])
    thres = params['thres']
    activation=params['activation']

    start_time = dt.datetime.now()
    # learn = LearnABC(trainData, trainLabels, FVS, HLS, P, limit, lb, ub, MR,L2, parallelType, evaluationNumber)
    # learn.learn()
    model = ABC_LR_Model(hiddenLayerSize=HLS, lb=lb, ub=ub, evaluationNumber=evaluationNumber, limit=limit, P=P, MR=MR, thres=thres, parallelType=parallelType, activation=activation)
    print(model);


    model.fit(trainData_reduced, trainLabels)
    [avgTest,p] = model.score(testData_reduced, testLabels)
    print(f"Test Result: {avgTest}")

    time=dt.datetime.now()-start_time

    mat=cp.asnumpy(p)
    testLabels_cpu=cp.asnumpy(testLabels)
    con_matrix=confusion_matrix(testLabels_cpu, mat)

    if avgTest > max_acc:
      max_acc = avgTest
      matrix=p


    counter=counter+1
    sys.stdout.flush()

    return {'loss': -1*avgTest, 'status': STATUS_OK}

def optimize(trial):
  params = {
    'lb': hp.uniform('lb',-30,0),
    'ub': hp.uniform('ub',0,30),
    'evaluationNumber': hp.uniform('evaluationNumber',10000,120000),
    'limit': hp.uniform('limit',10,200),
    'P': hp.uniform('P',10,200),
    'MR': hp.uniform('MR',.01,.2),
    'L2':hp.uniform('L2',.0,.001),
    'HLS': hp.uniform('HLS',2,20),
    'thres': hp.uniform('thres',0.2,0.8),
    'activation': hp.choice('activation',['tanh','sigmoid']),
  }


  best = fmin(
    fn=objective,
    space=params,
    algo=tpe.suggest,
    trials=Trials(),
    max_evals=150
#   timeout=100,
  )
  return best

import time
start_time = time.time()
trial=Trials()
trial.mybest = None
best=optimize(trial)
print(best)
print(f"best acc: {max_acc}" )
print("--- %s seconds for Bayesian optimization ---" % (time.time() - start_time))

## Train and Test

In [None]:
#------------Run 20 times with best parameters---------------------------
for i in range(20):
  lb = -20
  ub = 20
  evaluationNumber = 60008
  # FVS = trainData.shape[1]
  limit = 50
  P = 40
  MR = 0.054
  hiddenLayerSize = 3
  thres = 0.5
  activation='sigmoid'
  parallelType=cp

  model = ABC_LR_Model(hiddenLayerSize=hiddenLayerSize, lb=lb, ub=ub, evaluationNumber=evaluationNumber, limit=limit, P=P, MR=MR, thres=thres, parallelType=parallelType, activation=activation)
  print(model);
  start_time = dt.datetime.now()

  #trainLabels=cp.array(trainLabels)
  model.fit(trainData_reduced, trainLabels)
  [acc, p] = model.score(testData_reduced, testLabels)

  print(f"Run time: {dt.datetime.now()-start_time}")
  print(f"Result: {acc}")

  from sklearn.metrics import confusion_matrix
  from sklearn.metrics import classification_report

  matrix=cp.asnumpy(p)
  testLabels_cpu=cp.asnumpy(testLabels)
  con_matrix=confusion_matrix(testLabels_cpu, matrix)
  print(f"best acc: {acc}")
  print(con_matrix)
  print(classification_report(testLabels_cpu, matrix))


