{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "lx4RpN47ybt6" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import pandas as pd\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "from keras import losses\n", "from keras import optimizers\n", "from keras import metrics\n", "import math\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ], "metadata": { "id": "sIDDU2PYPdH_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = [\n", " \"duration\",\n", " \"protocol_type\",\n", " \"service\",\n", " \"flag\",\n", " \"src_bytes\",\n", " \"dst_bytes\",\n", " \"land\",\n", " \"wrong_fragment\",\n", " \"urgent\",\n", " \"hot\",\n", " \"num_failed_logins\",\n", " \"logged_in\",\n", " \"num_compromised\",\n", " \"root_shell\",\n", " \"su_attempted\",\n", " \"num_root\",\n", " \"num_file_creations\",\n", " \"num_shells\",\n", " \"num_access_files\",\n", " \"num_outbound_cmds\",\n", " \"is_host_login\",\n", " \"is_guest_login\",\n", " \"count\",\n", " \"srv_count\",\n", " \"serror_rate\",\n", " \"srv_serror_rate\",\n", " \"rerror_rate\",\n", " \"srv_rerror_rate\",\n", " \"same_srv_rate\",\n", " \"diff_srv_rate\",\n", " \"srv_diff_host_rate\",\n", " \"dst_host_count\",\n", " \"dst_host_srv_count\",\n", " \"dst_host_same_srv_rate\",\n", " \"dst_host_diff_srv_rate\",\n", " \"dst_host_same_src_port_rate\",\n", " \"dst_host_srv_diff_host_rate\",\n", " \"dst_host_serror_rate\",\n", " \"dst_host_srv_serror_rate\",\n", " \"dst_host_rerror_rate\",\n", " \"dst_host_srv_rerror_rate\",\n", " \"class\"\n", "]\n", "\n", "\n", "train_data = pd.read_csv(\"train.csv\", header=None, names=CSV_HEADER)\n", "\n", "test_data = pd.read_csv(\"test.csv\", header=None, names=CSV_HEADER)\n", "\n", "print(f\"Train dataset shape: {train_data.shape}\")\n", "print(f\"Test dataset shape: {test_data.shape}\")\n", "train_data['class'] = train_data['class'].str.replace(r\"^(.(?:53: FutureWarning: The default value of regex will change from True to False in a future version.\n", " train_data['class'] = train_data['class'].str.replace(r\"^(.(?:54: FutureWarning: The default value of regex will change from True to False in a future version.\n", " test_data['class'] = test_data['class'].str.replace(r\"^(.(?" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "## Removing outliers and duplicates" ], "metadata": { "id": "wHhIvDl9V5kl" } }, { "cell_type": "code", "source": [ "def Remove_Outlier_Indices(df):\n", " Q1 = df.quantile(0.02)\n", " Q3 = df.quantile(0.98)\n", " IQR = Q3 - Q1\n", " #trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(train_data)\n", "new_train_data = train_data[nonOutlierList]\n", "\n", "nonOutlierList = Remove_Outlier_Indices(test_data)\n", "new_test_data = test_data[nonOutlierList]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8GZwWkuSV5HT", "outputId": "f174d533-118b-4e2a-abe7-9baa85269641" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":3: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":6: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Merging train and test datasets" ], "metadata": { "id": "uNzhq0uHKkUB" } }, { "cell_type": "code", "source": [ "frames = [new_train_data, new_test_data]\n", "df = pd.concat(frames)\n", "df = df.reset_index(drop=True)\n", "df" ], "metadata": { "id": "3kxP6kBRGX2y", "colab": { "base_uri": "https://localhost:8080/", "height": 444 }, "outputId": "a4b2c9b5-1bcc-4922-eba8-8a2f9c39b881" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " duration protocol_type service flag src_bytes dst_bytes land \\\n", "0 0 1 20 9 491 0 0 \n", "1 0 2 44 9 146 0 0 \n", "2 0 1 49 5 0 0 0 \n", "3 0 1 24 9 232 8153 0 \n", "4 0 1 24 9 199 420 0 \n", "... ... ... ... ... ... ... ... \n", "139899 0 0 14 9 1032 0 0 \n", "139900 0 1 49 9 794 333 0 \n", "139901 0 1 22 9 317 938 0 \n", "139902 0 2 11 9 42 42 0 \n", "139903 0 1 52 1 0 0 0 \n", "\n", " wrong_fragment urgent hot ... dst_host_srv_count \\\n", "0 0 0 0 ... 25 \n", "1 0 0 0 ... 1 \n", "2 0 0 0 ... 26 \n", "3 0 0 0 ... 255 \n", "4 0 0 0 ... 255 \n", "... ... ... ... ... ... \n", "139899 0 0 0 ... 255 \n", "139900 0 0 0 ... 141 \n", "139901 0 0 0 ... 255 \n", "139902 0 0 0 ... 252 \n", "139903 0 0 0 ... 21 \n", "\n", " dst_host_same_srv_rate dst_host_diff_srv_rate \\\n", "0 0.17 0.03 \n", "1 0.00 0.60 \n", "2 0.10 0.05 \n", "3 1.00 0.00 \n", "4 1.00 0.00 \n", "... ... ... \n", "139899 1.00 0.00 \n", "139900 0.72 0.06 \n", "139901 1.00 0.00 \n", "139902 0.99 0.01 \n", "139903 0.08 0.03 \n", "\n", " dst_host_same_src_port_rate dst_host_srv_diff_host_rate \\\n", "0 0.17 0.00 \n", "1 0.88 0.00 \n", "2 0.00 0.00 \n", "3 0.03 0.04 \n", "4 0.00 0.00 \n", "... ... ... \n", "139899 1.00 0.00 \n", "139900 0.01 0.01 \n", "139901 0.01 0.01 \n", "139902 0.00 0.00 \n", "139903 0.00 0.00 \n", "\n", " dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate \\\n", "0 0.00 0.00 0.05 \n", "1 0.00 0.00 0.00 \n", "2 1.00 1.00 0.00 \n", "3 0.03 0.01 0.00 \n", "4 0.00 0.00 0.00 \n", "... ... ... ... \n", "139899 0.00 0.00 0.00 \n", "139900 0.01 0.00 0.00 \n", "139901 0.01 0.00 0.00 \n", "139902 0.00 0.00 0.00 \n", "139903 0.00 0.00 0.44 \n", "\n", " dst_host_srv_rerror_rate class \n", "0 0.00 normal \n", "1 0.00 normal \n", "2 0.00 anomaly \n", "3 0.01 normal \n", "4 0.00 normal \n", "... ... ... \n", "139899 0.00 anomaly \n", "139900 0.00 normal \n", "139901 0.00 normal \n", "139902 0.00 normal \n", "139903 1.00 anomaly \n", "\n", "[139904 rows x 42 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
durationprotocol_typeserviceflagsrc_bytesdst_byteslandwrong_fragmenturgenthot...dst_host_srv_countdst_host_same_srv_ratedst_host_diff_srv_ratedst_host_same_src_port_ratedst_host_srv_diff_host_ratedst_host_serror_ratedst_host_srv_serror_ratedst_host_rerror_ratedst_host_srv_rerror_rateclass
00120949100000...250.170.030.170.000.000.000.050.00normal
10244914600000...10.000.600.880.000.000.000.000.00normal
201495000000...260.100.050.000.001.001.000.000.00anomaly
30124923281530000...2551.000.000.030.040.030.010.000.01normal
4012491994200000...2551.000.000.000.000.000.000.000.00normal
..................................................................
13989900149103200000...2551.000.001.000.000.000.000.000.00anomaly
139900014997943330000...1410.720.060.010.010.010.000.000.00normal
139901012293179380000...2551.000.000.010.010.010.000.000.00normal
1399020211942420000...2520.990.010.000.000.000.000.000.00normal
13990301521000000...210.080.030.000.000.000.000.441.00anomaly
\n", "

139904 rows × 42 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 74 } ] }, { "cell_type": "markdown", "source": [ "# PCA" ], "metadata": { "id": "FiXD9jj7JzAt" } }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "n_components = 10\n", "columns = []\n", "for x in range(n_components):\n", " columns.append(str(x+1))\n", "sns.set()\n", "X = df.drop(['class'], axis=1)\n", "y = df[\"class\"]\n", "x_scaled = StandardScaler().fit_transform(X)\n", "\n", "pca = PCA(n_components)\n", "\n", "# Fit and transform data\n", "principalComponents = pca.fit_transform(x_scaled)\n", "\n", "principalDf = pd.DataFrame(data = principalComponents\n", " , columns = columns)\n", "finalDf = pd.concat([principalDf, df[\"class\"]], axis = 1)\n" ], "metadata": { "id": "7NHskuswvChi" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "\n", "model = pca\n", "X_pc = principalComponents\n", "\n", "# number of components\n", "n_pcs= model.components_.shape[0]\n", "\n", "# get the index of the most important feature on EACH component\n", "# LIST COMPREHENSION HERE\n", "most_important = [np.abs(model.components_[i]).argmax() for i in range(n_pcs)]\n", "\n", "initial_feature_names = CSV_HEADER\n", "# get the names\n", "most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]\n", "\n", "# LIST COMPREHENSION HERE AGAIN\n", "dic = {'PC{}'.format(i): most_important_names[i] for i in range(n_pcs)}\n", "\n", "# build the dataframe\n", "dfx = pd.DataFrame(dic.items())\n", "dfx" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 520 }, "id": "atTpAHhnJZXy", "outputId": "e61d84dc-9d87-46c3-a640-d102cfb494dc" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " 0 1\n", "0 PC0 same_srv_rate\n", "1 PC1 srv_rerror_rate\n", "2 PC2 dst_host_srv_diff_host_rate\n", "3 PC3 is_guest_login\n", "4 PC4 is_guest_login\n", "5 PC5 dst_host_diff_srv_rate\n", "6 PC6 src_bytes\n", "7 PC7 num_failed_logins\n", "8 PC8 num_failed_logins\n", "9 PC9 duration\n", "10 PC10 service\n", "11 PC11 srv_diff_host_rate\n", "12 PC12 dst_bytes\n", "13 PC13 dst_host_same_src_port_rate\n", "14 PC14 dst_host_srv_diff_host_rate" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0PC0same_srv_rate
1PC1srv_rerror_rate
2PC2dst_host_srv_diff_host_rate
3PC3is_guest_login
4PC4is_guest_login
5PC5dst_host_diff_srv_rate
6PC6src_bytes
7PC7num_failed_logins
8PC8num_failed_logins
9PC9duration
10PC10service
11PC11srv_diff_host_rate
12PC12dst_bytes
13PC13dst_host_same_src_port_rate
14PC14dst_host_srv_diff_host_rate
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 69 } ] }, { "cell_type": "code", "source": [ "\n", "# number of components\n", "n_pcs= pca.components_.shape[0]\n", "n_pcs\n", "# get the index of the most important feature on EACH component i.e. largest absolute value\n", "# using LIST COMPREHENSION HERE\n", "most_important = [np.abs(pca.components_[i]).argmax() for i in range(n_pcs)]\n", "\n", "initial_feature_names = CSV_HEADER\n", "\n", "# get the names\n", "most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]\n", "\n", "# using LIST COMPREHENSION HERE AGAIN\n", "dic = {'PC{}'.format(i+1): most_important_names[i] for i in range(n_pcs)}\n", "\n", "# build the dataframe\n", "df = pd.DataFrame(sorted(dic.items()))" ], "metadata": { "id": "kMi0txUbD0vE" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "LOKV6FgnIMsR", "outputId": "48da67c9-ec3d-40cb-dcd5-783f1e71e0f0" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " 0 1\n", "0 PC1 same_srv_rate\n", "1 PC10 duration\n", "2 PC2 srv_rerror_rate\n", "3 PC3 dst_host_srv_diff_host_rate\n", "4 PC4 is_guest_login\n", "5 PC5 is_guest_login\n", "6 PC6 dst_host_diff_srv_rate\n", "7 PC7 src_bytes\n", "8 PC8 num_failed_logins\n", "9 PC9 num_failed_logins" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0PC1same_srv_rate
1PC10duration
2PC2srv_rerror_rate
3PC3dst_host_srv_diff_host_rate
4PC4is_guest_login
5PC5is_guest_login
6PC6dst_host_diff_srv_rate
7PC7src_bytes
8PC8num_failed_logins
9PC9num_failed_logins
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 33 } ] }, { "cell_type": "markdown", "source": [ "# Model" ], "metadata": { "id": "B6B5ZuR5J5WQ" } }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "_1N70b_DJb2m" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\"class\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \"class\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"normal\", \"anomaly\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "id": "PGCEwlOPPpEP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "d11c2e57-c34a-4dce-d04b-22ee0655f7f9" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vLjxBfBIQUKR", "outputId": "a83b97e9-cbdd-4dd9-d987-d3a018cac0b1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "820/820 [==============================] - 45s 28ms/step - loss: 0.1757 - sparse_categorical_accuracy: 0.9566\n", "Epoch 2/10\n", "820/820 [==============================] - 20s 24ms/step - loss: 0.0962 - sparse_categorical_accuracy: 0.9687\n", "Epoch 3/10\n", "820/820 [==============================] - 18s 22ms/step - loss: 0.0874 - sparse_categorical_accuracy: 0.9703\n", "Epoch 4/10\n", "820/820 [==============================] - 19s 23ms/step - loss: 0.0833 - sparse_categorical_accuracy: 0.9712\n", "Epoch 5/10\n", "820/820 [==============================] - 18s 22ms/step - loss: 0.0810 - sparse_categorical_accuracy: 0.9717\n", "Epoch 6/10\n", "820/820 [==============================] - 20s 24ms/step - loss: 0.0793 - sparse_categorical_accuracy: 0.9718\n", "Epoch 7/10\n", "820/820 [==============================] - 18s 22ms/step - loss: 0.0779 - sparse_categorical_accuracy: 0.9723\n", "Epoch 8/10\n", "820/820 [==============================] - 19s 24ms/step - loss: 0.0768 - sparse_categorical_accuracy: 0.9726\n", "Epoch 9/10\n", "820/820 [==============================] - 18s 22ms/step - loss: 0.0759 - sparse_categorical_accuracy: 0.9727\n", "Epoch 10/10\n", "820/820 [==============================] - 18s 22ms/step - loss: 0.0751 - sparse_categorical_accuracy: 0.9729\n", "Model training finished\n", "Evaluating the model on the test data...\n", "274/274 [==============================] - 7s 16ms/step - loss: 0.0672 - sparse_categorical_accuracy: 0.9754\n", "Test accuracy: 97.54%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('normal', 0,inplace=True)\n", "data['target'].replace('anomaly', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5pukCWAiuyv5", "outputId": "2cd847d4-3635-4d4d-a542-4576d55f27d5" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "274/274 [==============================] - 5s 11ms/step\n", "[[0.96431656 0.01209579]\n", " [0.03568344 0.98790421]]\n", "ACC: 0.9761103842365505\n", "PR: 0.9876120025022016\n", "TPR: 0.9643165622975599\n", "FPR: 0.012095793824459033\n", "F1Score: 0.9758252717966972\n" ] } ] }, { "cell_type": "code", "source": [ "result = confusion_matrix(y_test, y_prediction , normalize='pred')" ], "metadata": { "id": "9VDBBzEQLOJS" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "id": "ruhkENcrJGRh", "outputId": "d7b95a9c-dd92-4247-a436-d0c8dd8f7cbf" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "result" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Qv5tjBAsLcFw", "outputId": "ad68ccf3-1277-46d4-8738-3a6d51bfff86" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0.96431656, 0.01209579],\n", " [0.03568344, 0.98790421]])" ] }, "metadata": {}, "execution_count": 110 } ] }, { "cell_type": "code", "source": [ "import numpy as np\n", "result = np.array( [[0.96431656, 0.01209579],\n", " [0.03568344, 0.98790421]])\n", "result" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Mg1rGRl2Os5T", "outputId": "16330d04-23d9-4dfc-b76c-3ac55d81369b" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0.96431656, 0.01209579],\n", " [0.03568344, 0.98790421]])" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "markdown", "source": [ "## CISIDS2017" ], "metadata": { "id": "lx4RpN47ybt6" } }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kHU-PAI6yhyz", "outputId": "a7e81346-a8b3-44c0-841f-4d2217882c02" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import glob\n", "import os\n", "\n", "path = r'C:\\DRO\\DCL_rawdata_files' # use your path\n", "all_files = glob.glob(os.path.join(path , \"/content/drive/MyDrive/datasets/CISIDS2017/*.csv\"))\n", "\n", "li = []\n", "\n", "for filename in all_files:\n", " df = pd.read_csv(filename, index_col=None, header=0)\n", " li.append(df)\n", "\n", "frame = pd.concat(li, axis=0, ignore_index=True)" ], "metadata": { "id": "orUywltc2yaK" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "frame" ], "metadata": { "id": "OnM8i81AUmva", "outputId": "f6e5ef49-82cf-4268-a39b-4f5b75f92b01", "colab": { "base_uri": "https://localhost:8080/", "height": 496 } }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Destination Port Flow Duration Total Fwd Packets \\\n", "0 54865 3 2 \n", "1 55054 109 1 \n", "2 55055 52 1 \n", "3 46236 34 1 \n", "4 54863 3 2 \n", "... ... ... ... \n", "2830738 53 32215 4 \n", "2830739 53 324 2 \n", "2830740 58030 82 2 \n", "2830741 53 1048635 6 \n", "2830742 53 94939 4 \n", "\n", " Total Backward Packets Total Length of Fwd Packets \\\n", "0 0 12 \n", "1 1 6 \n", "2 1 6 \n", "3 1 6 \n", "4 0 12 \n", "... ... ... \n", "2830738 2 112 \n", "2830739 2 84 \n", "2830740 1 31 \n", "2830741 2 192 \n", "2830742 2 188 \n", "\n", " Total Length of Bwd Packets Fwd Packet Length Max \\\n", "0 0 6 \n", "1 6 6 \n", "2 6 6 \n", "3 6 6 \n", "4 0 6 \n", "... ... ... \n", "2830738 152 28 \n", "2830739 362 42 \n", "2830740 6 31 \n", "2830741 256 32 \n", "2830742 226 47 \n", "\n", " Fwd Packet Length Min Fwd Packet Length Mean \\\n", "0 6 6.0 \n", "1 6 6.0 \n", "2 6 6.0 \n", "3 6 6.0 \n", "4 6 6.0 \n", "... ... ... \n", "2830738 28 28.0 \n", "2830739 42 42.0 \n", "2830740 0 15.5 \n", "2830741 32 32.0 \n", "2830742 47 47.0 \n", "\n", " Fwd Packet Length Std ... min_seg_size_forward Active Mean \\\n", "0 0.00000 ... 20 0.0 \n", "1 0.00000 ... 20 0.0 \n", "2 0.00000 ... 20 0.0 \n", "3 0.00000 ... 20 0.0 \n", "4 0.00000 ... 20 0.0 \n", "... ... ... ... ... \n", "2830738 0.00000 ... 20 0.0 \n", "2830739 0.00000 ... 20 0.0 \n", "2830740 21.92031 ... 32 0.0 \n", "2830741 0.00000 ... 20 0.0 \n", "2830742 0.00000 ... 20 0.0 \n", "\n", " Active Std Active Max Active Min Idle Mean Idle Std \\\n", "0 0.0 0 0 0.0 0.0 \n", "1 0.0 0 0 0.0 0.0 \n", "2 0.0 0 0 0.0 0.0 \n", "3 0.0 0 0 0.0 0.0 \n", "4 0.0 0 0 0.0 0.0 \n", "... ... ... ... ... ... \n", "2830738 0.0 0 0 0.0 0.0 \n", "2830739 0.0 0 0 0.0 0.0 \n", "2830740 0.0 0 0 0.0 0.0 \n", "2830741 0.0 0 0 0.0 0.0 \n", "2830742 0.0 0 0 0.0 0.0 \n", "\n", " Idle Max Idle Min Label \n", "0 0 0 BENIGN \n", "1 0 0 BENIGN \n", "2 0 0 BENIGN \n", "3 0 0 BENIGN \n", "4 0 0 BENIGN \n", "... ... ... ... \n", "2830738 0 0 BENIGN \n", "2830739 0 0 BENIGN \n", "2830740 0 0 BENIGN \n", "2830741 0 0 BENIGN \n", "2830742 0 0 BENIGN \n", "\n", "[2830743 rows x 79 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Destination PortFlow DurationTotal Fwd PacketsTotal Backward PacketsTotal Length of Fwd PacketsTotal Length of Bwd PacketsFwd Packet Length MaxFwd Packet Length MinFwd Packet Length MeanFwd Packet Length Std...min_seg_size_forwardActive MeanActive StdActive MaxActive MinIdle MeanIdle StdIdle MaxIdle MinLabel
054865320120666.00.00000...200.00.0000.00.000BENIGN
1550541091166666.00.00000...200.00.0000.00.000BENIGN
255055521166666.00.00000...200.00.0000.00.000BENIGN
346236341166666.00.00000...200.00.0000.00.000BENIGN
454863320120666.00.00000...200.00.0000.00.000BENIGN
..................................................................
2830738533221542112152282828.00.00000...200.00.0000.00.000BENIGN
2830739533242284362424242.00.00000...200.00.0000.00.000BENIGN
283074058030822131631015.521.92031...320.00.0000.00.000BENIGN
283074153104863562192256323232.00.00000...200.00.0000.00.000BENIGN
2830742539493942188226474747.00.00000...200.00.0000.00.000BENIGN
\n", "

2830743 rows × 79 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "source": [ "column_headers = list(frame.columns.values)\n", "column_headers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eJcGQKBn4Abs", "outputId": "c7e1dd53-ea73-4d25-e6d8-c1d45feb61b9" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[' Destination Port',\n", " ' Flow Duration',\n", " ' Total Fwd Packets',\n", " ' Total Backward Packets',\n", " 'Total Length of Fwd Packets',\n", " ' Total Length of Bwd Packets',\n", " ' Fwd Packet Length Max',\n", " ' Fwd Packet Length Min',\n", " ' Fwd Packet Length Mean',\n", " ' Fwd Packet Length Std',\n", " 'Bwd Packet Length Max',\n", " ' Bwd Packet Length Min',\n", " ' Bwd Packet Length Mean',\n", " ' Bwd Packet Length Std',\n", " 'Flow Bytes/s',\n", " ' Flow Packets/s',\n", " ' Flow IAT Mean',\n", " ' Flow IAT Std',\n", " ' Flow IAT Max',\n", " ' Flow IAT Min',\n", " 'Fwd IAT Total',\n", " ' Fwd IAT Mean',\n", " ' Fwd IAT Std',\n", " ' Fwd IAT Max',\n", " ' Fwd IAT Min',\n", " 'Bwd IAT Total',\n", " ' Bwd IAT Mean',\n", " ' Bwd IAT Std',\n", " ' Bwd IAT Max',\n", " ' Bwd IAT Min',\n", " 'Fwd PSH Flags',\n", " ' Bwd PSH Flags',\n", " ' Fwd URG Flags',\n", " ' Bwd URG Flags',\n", " ' Fwd Header Length',\n", " ' Bwd Header Length',\n", " 'Fwd Packets/s',\n", " ' Bwd Packets/s',\n", " ' Min Packet Length',\n", " ' Max Packet Length',\n", " ' Packet Length Mean',\n", " ' Packet Length Std',\n", " ' Packet Length Variance',\n", " 'FIN Flag Count',\n", " ' SYN Flag Count',\n", " ' RST Flag Count',\n", " ' PSH Flag Count',\n", " ' ACK Flag Count',\n", " ' URG Flag Count',\n", " ' CWE Flag Count',\n", " ' ECE Flag Count',\n", " ' Down/Up Ratio',\n", " ' Average Packet Size',\n", " ' Avg Fwd Segment Size',\n", " ' Avg Bwd Segment Size',\n", " ' Fwd Header Length.1',\n", " 'Fwd Avg Bytes/Bulk',\n", " ' Fwd Avg Packets/Bulk',\n", " ' Fwd Avg Bulk Rate',\n", " ' Bwd Avg Bytes/Bulk',\n", " ' Bwd Avg Packets/Bulk',\n", " 'Bwd Avg Bulk Rate',\n", " 'Subflow Fwd Packets',\n", " ' Subflow Fwd Bytes',\n", " ' Subflow Bwd Packets',\n", " ' Subflow Bwd Bytes',\n", " 'Init_Win_bytes_forward',\n", " ' Init_Win_bytes_backward',\n", " ' act_data_pkt_fwd',\n", " ' min_seg_size_forward',\n", " 'Active Mean',\n", " ' Active Std',\n", " ' Active Max',\n", " ' Active Min',\n", " 'Idle Mean',\n", " ' Idle Std',\n", " ' Idle Max',\n", " ' Idle Min',\n", " ' Label']" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "source": [ "frame[' Label'] = frame[' Label'].str.replace(r\"^(.(?:1: FutureWarning: The default value of regex will change from True to False in a future version.\n", " frame[' Label'] = frame[' Label'].str.replace(r\"^(.(? (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(df)\n", "new_data = df[nonOutlierList]\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "90ea9f7e-42a2-4a2e-8d6c-c6179f7e9d2b", "id": "i_PXbZyt5-fR" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":3: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":6: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] } ] }, { "cell_type": "code", "source": [ "#Deleting local variables to free the memory\n", "df = new_data\n", "df = df.reset_index(drop=True)\n", "del new_data\n", "del nonOutlierList\n", "del li\n", "del frame" ], "metadata": { "id": "FSPL95xg5-fT" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "n_components = 10\n", "columns = []\n", "for x in range(n_components):\n", " columns.append(str(x+1))\n", "\n", "X = df.drop([' Label'], axis=1).values\n", "y = df[\" Label\"].values\n", "x_scaled = StandardScaler().fit_transform(X)\n", "\n", "pca = PCA(n_components)\n", "\n", "# Fit and transform data\n", "principalComponents = pca.fit_transform(x_scaled)\n", "\n", "principalDf = pd.DataFrame(data = principalComponents\n", " , columns = columns)\n", "finalDf = pd.concat([principalDf, df[\" Label\"]], axis = 1)\n" ], "metadata": { "id": "KH6-dLBE5-fU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model = pca\n", "X_pc = principalComponents\n", "\n", "# number of components\n", "n_pcs= model.components_.shape[0]\n", "\n", "# get the index of the most important feature on EACH component\n", "# LIST COMPREHENSION HERE\n", "most_important = [np.abs(model.components_[i]).argmax() for i in range(n_pcs)]\n", "\n", "initial_feature_names = CSV_HEADER\n", "# get the names\n", "most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]\n", "\n", "# LIST COMPREHENSION HERE AGAIN\n", "dic = {'PC{}'.format(i): most_important_names[i] for i in range(n_pcs)}\n", "\n", "# build the dataframe\n", "dfx = pd.DataFrame(dic.items())\n", "dfx" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "2cutctpIQIFe", "outputId": "5054c835-302f-45bb-e6a6-f84bcc92e0df" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " 0 1\n", "0 PC0 Flow Duration\n", "1 PC1 Total Length of Fwd Packets\n", "2 PC2 ACK Flag Count\n", "3 PC3 Fwd Packet Length Min\n", "4 PC4 Fwd Packet Length Mean\n", "5 PC5 Flow Bytes/s\n", "6 PC6 Bwd IAT Std\n", "7 PC7 Active Mean\n", "8 PC8 URG Flag Count\n", "9 PC9 Bwd IAT Min" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0PC0Flow Duration
1PC1Total Length of Fwd Packets
2PC2ACK Flag Count
3PC3Fwd Packet Length Min
4PC4Fwd Packet Length Mean
5PC5Flow Bytes/s
6PC6Bwd IAT Std
7PC7Active Mean
8PC8URG Flag Count
9PC9Bwd IAT Min
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 64 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "c85wOCBv5-fX" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "del finalDf\n", "del principalDf\n", "del train_data\n", "del test_data" ], "metadata": { "id": "szYkC6TjPd0U" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\" Label\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \" Label\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"BENIGN\", \"ANOMALY\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "id": "8txIkhqk5-fX", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9f5593a3-23af-4019-e1a8-8b65018be338" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "939b76c5-0742-4f09-e122-8ec4988c107f", "id": "cEIPaJMi5-fa" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "14880/14880 [==============================] - 479s 31ms/step - loss: 0.0907 - sparse_categorical_accuracy: 0.9675\n", "Epoch 2/10\n", "14880/14880 [==============================] - 397s 27ms/step - loss: 0.0763 - sparse_categorical_accuracy: 0.9725\n", "Epoch 3/10\n", "14880/14880 [==============================] - 391s 26ms/step - loss: 0.0741 - sparse_categorical_accuracy: 0.9734\n", "Epoch 4/10\n", "14880/14880 [==============================] - 392s 26ms/step - loss: 0.0729 - sparse_categorical_accuracy: 0.9738\n", "Epoch 5/10\n", "14880/14880 [==============================] - 393s 26ms/step - loss: 0.0721 - sparse_categorical_accuracy: 0.9740\n", "Epoch 6/10\n", "14880/14880 [==============================] - 392s 26ms/step - loss: 0.0715 - sparse_categorical_accuracy: 0.9741\n", "Epoch 7/10\n", "14880/14880 [==============================] - 396s 27ms/step - loss: 0.0711 - sparse_categorical_accuracy: 0.9743\n", "Epoch 8/10\n", "14880/14880 [==============================] - 395s 27ms/step - loss: 0.0705 - sparse_categorical_accuracy: 0.9744\n", "Epoch 9/10\n", "14880/14880 [==============================] - 379s 25ms/step - loss: 0.0702 - sparse_categorical_accuracy: 0.9745\n", "Epoch 10/10\n", "14880/14880 [==============================] - 378s 25ms/step - loss: 0.0700 - sparse_categorical_accuracy: 0.9746\n", "Model training finished\n", "Evaluating the model on the test data...\n", "4960/4960 [==============================] - 62s 12ms/step - loss: 0.0522 - sparse_categorical_accuracy: 0.9818\n", "Test accuracy: 98.18%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('BENIGN', 0,inplace=True)\n", "data['target'].replace('ANOMALY', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588 }, "id": "JR5WC1Y0m3hJ", "outputId": "cdc82ece-caf7-4c5b-ea7f-57ac3a84742b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4960/4960 [==============================] - 66s 13ms/step\n", "[[0.98051765 0.01229758]\n", " [0.01948235 0.98770242]]\n", "ACC: 0.9841100339902484\n", "PR: 0.987613421019833\n", "TPR: 0.9805176523221462\n", "FPR: 0.012297584341649253\n", "F1Score: 0.9840527453650412\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhAAAAGwCAYAAAD49Fz6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7hElEQVR4nO3deXwU9f3H8fcmIZv7AEJCQuQwXKmcodJUEbHh0IrgUa2iHApVMB5QLmu5hXiBiEVQFBDFAgW1Cog/REEitEoQVC7lDFeQOxDItTO/PyiLkQ1m2cmx7Ov5eMzj4c5+Z+Yzbdj97OfznRmbaZqmAAAA3OBX2QEAAADvQwIBAADcRgIBAADcRgIBAADcRgIBAADcRgIBAADcRgIBAADcFlDZAXgDwzB04MABhYeHy2azVXY4AAA3maapU6dOKT4+Xn5+5ffbOT8/X4WFhR7vJzAwUEFBQRZEVH5IIMrgwIEDSkxMrOwwAAAe2rt3r+rUqVMu+87Pz1f9umHK+cnh8b7i4uK0a9euKp1EkECUQXh4uCRpV9ZVCg+j64Mr012NW1R2CEC5KVaRMrXU+XleHgoLC5Xzk0N7suopIvzyvytyTxmqm7JbhYWFJBDe7nzbIjzMz6M/CqAqC7BVq+wQgPLzv4c2VEQbOizcprDwyz+OIe9olZNAAABgIYdpyOHBU6YcpmFdMOWIBAIAAAsZMmXo8jMIT7atSNTjAQCA26hAAABgIUOGPGlCeLZ1xSGBAADAQg7TlMO8/DaEJ9tWJFoYAADAbVQgAACwkK9MoiSBAADAQoZMOXwggaCFAQAA3EYFAgAAC9HCAAAAbuMqDAAAgFJQgQAAwELG/xZPtvcGJBAAAFjI4eFVGJ5sW5FIIAAAsJDDlIdP47QulvLEHAgAAOA2KhAAAFiIORAAAMBthmxyyObR9t6AFgYAAHAbFQgAACxkmOcWT7b3BiQQAABYyOFhC8OTbSsSLQwAAOA2KhAAAFjIVyoQJBAAAFjIMG0yTA+uwvBg24pECwMAALiNCgQAABaihQEAANzmkJ8cHhT4HRbGUp5IIAAAsJDp4RwIkzkQAADgSkUFAgAACzEHAgAAuM1h+slhejAHwktuZU0LAwAAuI0KBAAAFjJkk+HB73ND3lGCIIEAAMBCvjIHghYGAABwGxUIAAAs5PkkSloYAAD4nHNzIDx4mBYtDAAAcKWiAgEAgIUMD5+FwVUYAAD4IOZAAAAAtxny84n7QDAHAgAAuI0KBAAAFnKYNjk8eCS3J9tWJBIIAAAs5PBwEqWDFgYAALhSUYEAAMBChuknw4OrMAyuwgAAwPfQwgAAACgFFQgAACxkyLMrKQzrQilXJBAAAFjI8xtJeUdzwDuiBAAAVQoVCAAALOT5szC847c9CQQAABYyZJMhT+ZAcCdKAAB8jq9UILwjSgAAUKVQgQAAwEKe30jKO37bk0AAAGAhw7TJ8OQ+EF7yNE7vSHMAAECVQgUCAAALGR62MLzlRlIkEAAAWMjzp3F6RwLhHVECAIAqhQoEAAAWcsgmhwc3g/Jk24pEAgEAgIVoYQAAAJSCCgQAABZyyLM2hMO6UMoVFQgAACx0voXhyXI5pk6dqnr16ikoKEht27bVV199dcnxkydPVuPGjRUcHKzExEQNHDhQ+fn5ZT4eFQgAACxUGQ/Tmj9/vgYNGqTp06erbdu2mjx5sjp37qxt27apVq1aF41/9913NXz4cM2cOVO///3v9cMPP6h3796y2WyaNGlSmY5JBQIAgCooNze3xFJQUFDq2EmTJqlfv37q06ePkpOTNX36dIWEhGjmzJkux69Zs0bXXXed7rvvPtWrV0+dOnXSvffe+6tVi58jgQAAwEKmbDI8WMz/zZ9ITExUZGSkc8nIyHB5vMLCQmVlZSktLc25zs/PT2lpaVq7dq3LbX7/+98rKyvLmTDs3LlTS5cu1S233FLm86SFAQCAhaxqYezdu1cRERHO9Xa73eX4I0eOyOFwKDY2tsT62NhYbd261eU29913n44cOaLrr79epmmquLhYjzzyiP72t7+VOU4qEAAAVEERERElltISiMuxcuVKTZgwQa+++qrWr1+v9957T0uWLNG4cePKvA8qEAAAWKiiH+dds2ZN+fv769ChQyXWHzp0SHFxcS63GTFihB544AH17dtXktSsWTPl5eXpL3/5i55++mn5+f16fYEKBAAAFnL872mcnizuCAwMVEpKilasWOFcZxiGVqxYodTUVJfbnDlz5qIkwd/fX5JkmmaZjksFAgAALzdo0CD16tVLbdq00bXXXqvJkycrLy9Pffr0kST17NlTCQkJzomYXbt21aRJk9SqVSu1bdtW27dv14gRI9S1a1dnIvFrSCAAALBQRbcwJOmee+7R4cOHNXLkSOXk5Khly5ZatmyZc2JldnZ2iYrD3//+d9lsNv3973/X/v37FRMTo65du2r8+PFlPqbNLGutwofl5uYqMjJSR7bVU0Q4XR9cmW5JaF3ZIQDlptgs0kr9WydPnixxZYOVzn9XpGfeLntYtcveT8HpIv3j+vfLNVYr8G0IAADcRgsDAAALOUybHB60MDzZtiKRQAAAYKHKmANRGUggAACwkOnBEzXPb+8NvCNKAABQpVCBAADAQg7Z5JAHcyA82LYikUAAAGAhw/RsHoPhJTdXoIUBAADcRgUCFeKj2TW1aFqsjh+upvrJZ9V/3F41bnXG5djiImnBP+L06b9q6GhONdVpkK8+Tx9Qmw65zjEOhzR3Ym19/l51HT9cTdVji5T2p6O698kc2byj+gcv1rX3Ed3V/ydVjynWzs3BevXvCdq2IaTU8e1uPaFeQ3MUW6dQ+3fZ9eb42vr6sws3CLru5hP6Y8+jatjsrCKqO9S/YyPt3BTsfD88qlgPDM5R6/anVSu+UCePBWjNski99Xyczpwq222HUXEMDydRerJtRfKOKOHVVv07WjPG1NF9gw7qlWVb1SD5rEb0SNKJI67z1znPx+vjd2qq/7i9mv75Zt3ywBE907eBdnx/4QN14dRYLZ0To/7P7NVrKzfrwb/t16JpsfpwZkxFnRZ8VPvbjusvow5o7qQ4Pdq5kXZuDtL4d3cqskaRy/HJbfL01Kt7tOyf1TWgUyOtWRahUTN3q27js84xQSGGNn0Vqjcn1Ha5j+qxRaoRW6wZY2vr4Zsa68UnE9XmxlwNmri3XM4RnjFk83jxBlUqgbDZbJdcRo8eXdkh4jK8P6OWutx3RJ3uOaarGuUr/dls2YMN/d+8Gi7Hf7aouu5+LEe//UOuatct1B97HVGbm07qvddinWM2rwvT7zqf0LVpuYpNLNT1t55Qq/a5+mFDaEWdFnzUHX85omXvVtf/za+u7B+DNGVYHRWctanzvcdcju/e97DWfR6uhdNqae/2IM15oba2fxesbn2OOsesWFRdc1+K0zdfhLvcx55twRrXr57+uzxSB/fYtfHLcM1+rrbadsyVn7+XNMxxxalSCcTBgwedy+TJkxUREVFi3eDBg51jTdNUcXFxJUaLsigqtGn7tyFq2e6Uc52fn9Ty+lPamuX6y76owE+B9pIfivYgU5u+ujA+uc1pbcgM174ddknSzk3B2vxVmNp0OFkOZwGcE1DNUMPmZ7R+9YUvetO06ZvV4UpOcd2Sa5pyRt+sLpkYZK0KV9OUPI9iCY1w6MxpPxkO7/i16kvO34nSk8UbVKkEIi4uzrlERkbKZrM5X2/dulXh4eH6+OOPlZKSIrvdrszMTPXu3Vvdu3cvsZ8nn3xSN954o/O1YRjKyMhQ/fr1FRwcrBYtWmjhwoUVe3I+KvdYgAyHTdE1SyZ7UTHFOnbY9cNmWt+Yq/dfr6X9O+0yDGn9F+FaszRKx366MP5P6YfUvttxPdw+WV3rttJjnZuoW9+f1OGO4+V6PvBtEdUd8g+QThwu2X47fiRA0TGuf9BExxTr+C/adccPByi61uX/AIqoXqz7njykj99xXcVD5To/B8KTxRt43STK4cOH68UXX1SDBg0UHR1dpm0yMjL0zjvvaPr06WrYsKG++OIL3X///YqJiVH79u0vGl9QUKCCggLn69zc3IvGoPw8MnafXh5ylR5unyzZpNp1C5R2z1Etn3/hw3L1R9H6/L3qGjp1t65qdFY7N4Xo9VF1VCO2SGl3uy4lA1eCkDCHxs3ZpewfgvT2xLjKDgc+zOsSiLFjx6pjx45lHl9QUKAJEybo008/VWpqqiSpQYMGyszM1GuvveYygcjIyNCYMWMsi9mXRVQvlp+/edEvsBOHA1Q9xvWks8gaxRo5c6cK823KPR6gGnFFmjUhXnFXXUjq3hyXoD+l56h9t3MVh/pN8/XTvkAt+EccCQTKTe4xfzmKz1XQfi66ZrGOH3b9cXr8cMBFFbjomGId/8n9j9/gUIfGv7tTZ/P8NOahenIUe0ep29cY8vBZGEyiLB9t2rRxa/z27dt15swZdezYUWFhYc5lzpw52rFjh8ttnnrqKZ08edK57N3LTOfLVS3QVFLzM9qYeaEHbBjShsxwNfmVHnBgkKmatYvkKJa+XBql33W6ML+h4Kyf/H7xb8zP35RhWBo+UEJxkZ9+/DZEra6/MKfHZjPV8vrT2pzl+jLOLVkhatnudIl1rW84pS2lzAEqTUiYQxP+uVNFhTaN6l1fRQVe9/HtM0wPr8AwvSSB8LoKRGhoyX90fn5+Ms2SE+6Kii78sj19+tw/3CVLlighIaHEOLvd7vIYdru91Pfgvtv7/aRJA+uqYfMzatTqjP49I0YFZ/3U8Z5zs9BffLyuatQuUp+nDkiStq4P0dGcQDX4zRkdzammuRNryzRsumvAIec+23Y8qXlT4hSTUKi6jfO14/tgvf96LXX681GXMQBWee/1mho8ea9+2Biibd+E6PZ+hxUUYuj/5lWXJA15OVtHcqppVsa5SzI/eCNGLyzarjsf/klfrYhQ+24n1LD5WU0eUse5z/CoYsUkFKlG7LnPrsSr8yVJx38K0PHD1ZzJgz3Y0POP1VNImEMhYQ5J0smjATIM7/jC8RU8jdNLxMTE6Pvvvy+xbsOGDapW7dyEu+TkZNntdmVnZ7tsV6D8te92XLnHAvT2i7V1/HA1NfjNWY19Z7tz0tnhA4Hy+9mPqaICP815vrZysu0KDjHU5qaTGjxlj8IiHc4xjzyzV28/H6+pf0vUyaPnbiR18/1HdN/AnIo+PfiYVR9GK7KGQz2H5Cg6plg7NwXr6R71deLIuc+cmITCEpWwzetC9eyjddVrWI56D8/RgV12jXmwnvZsu3Bfk991ytXgyRcqnX+bni1JentirN6ZGKekZmfV9H9Xecxeu7VEPD2vbapD+wLL63SBUnl9AnHTTTfphRde0Jw5c5Samqp33nlH33//vVq1aiVJCg8P1+DBgzVw4EAZhqHrr79eJ0+e1JdffqmIiAj16tWrks/AN3Ttc1hd+xx2+d5zC38s8bpZ6mm9tnLLJfcXEmbo4bH79PDYfZbFCJTVh7Nq6sNZNV2+N/SupIvWrV4cpdWLo0rd3/IF1bV8QfVS3/92bZg6x7dwO05UDl+5E6XXJxCdO3fWiBEjNHToUOXn5+vBBx9Uz5499d133znHjBs3TjExMcrIyNDOnTsVFRWl1q1b629/+1slRg4AuBL5SgvDZv5yAgEukpubq8jISB3ZVk8R4d6RGQLuuiWhdWWHAJSbYrNIK/VvnTx5UhEREb++wWU4/13R7f8eVLXQy28rFeUV6t+dZpZrrFbw+goEAABViafPs/CWyzhJIAAAsJCvtDCoxwMAALdRgQAAwEK+UoEggQAAwEK+kkDQwgAAAG6jAgEAgIV8pQJBAgEAgIVMeXYpprfcnIkEAgAAC/lKBYI5EAAAwG1UIAAAsJCvVCBIIAAAsJCvJBC0MAAAgNuoQAAAYCFfqUCQQAAAYCHTtMn0IAnwZNuKRAsDAAC4jQoEAAAWMmTz6EZSnmxbkUggAACwkK/MgaCFAQAA3EYFAgAAC/nKJEoSCAAALOQrLQwSCAAALOQrFQjmQAAAALdRgQAAwEKmhy0Mb6lAkEAAAGAhU5Jpera9N6CFAQAA3EYFAgAACxmyycadKAEAgDu4CgMAAKAUVCAAALCQYdpk40ZSAADAHabp4VUYXnIZBi0MAADgNioQAABYyFcmUZJAAABgIRIIAADgNl+ZRMkcCAAA4DYqEAAAWMhXrsIggQAAwELnEghP5kBYGEw5ooUBAADcRgUCAAALcRUGAABwm/m/xZPtvQEtDAAA4DYqEAAAWMhXWhhUIAAAsJJpwXIZpk6dqnr16ikoKEht27bVV199dcnxJ06c0KOPPqratWvLbrerUaNGWrp0aZmPRwUCAAAreViB0GVsO3/+fA0aNEjTp09X27ZtNXnyZHXu3Fnbtm1TrVq1LhpfWFiojh07qlatWlq4cKESEhK0Z88eRUVFlfmYJBAAAHi5SZMmqV+/furTp48kafr06VqyZIlmzpyp4cOHXzR+5syZOnbsmNasWaNq1apJkurVq+fWMWlhAABgofN3ovRkkaTc3NwSS0FBgcvjFRYWKisrS2lpac51fn5+SktL09q1a11u8+GHHyo1NVWPPvqoYmNjdc0112jChAlyOBxlPk8SCAAALHR+EqUniyQlJiYqMjLSuWRkZLg83pEjR+RwOBQbG1tifWxsrHJyclxus3PnTi1cuFAOh0NLly7ViBEjNHHiRD3zzDNlPk9aGAAAVEF79+5VRESE87Xdbrds34ZhqFatWnr99dfl7++vlJQU7d+/Xy+88IJGjRpVpn2QQAAAYCXTdlkTIUtsLykiIqJEAlGamjVryt/fX4cOHSqx/tChQ4qLi3O5Te3atVWtWjX5+/s71zVt2lQ5OTkqLCxUYGDgrx6XFgYAABayag5EWQUGBiolJUUrVqxwrjMMQytWrFBqaqrLba677jpt375dhmE41/3www+qXbt2mZIHiQQCAACvN2jQIM2YMUNvvfWWtmzZov79+ysvL895VUbPnj311FNPOcf3799fx44d0xNPPKEffvhBS5Ys0YQJE/Too4+W+Zi0MAAAsFIlPAzjnnvu0eHDhzVy5Ejl5OSoZcuWWrZsmXNiZXZ2tvz8LtQMEhMT9cknn2jgwIFq3ry5EhIS9MQTT2jYsGFlPiYJBAAAFqqsW1mnp6crPT3d5XsrV668aF1qaqr+85//XNaxpDImEB9++GGZd3jbbbdddjAAAMA7lCmB6N69e5l2ZrPZ3LoJBQAAVyRveSa3B8qUQPx8liYAACgdT+Msg/z8fKviAADgylBJT+OsaG4nEA6HQ+PGjVNCQoLCwsK0c+dOSdKIESP05ptvWh4gAACoetxOIMaPH6/Zs2fr+eefL3GziWuuuUZvvPGGpcEBAOB9bBYsVZ/bCcScOXP0+uuvq0ePHiVugdmiRQtt3brV0uAAAPA6tDBc279/v5KSki5abxiGioqKLAkKAABUbW4nEMnJyVq9evVF6xcuXKhWrVpZEhQAAF7LRyoQbt+JcuTIkerVq5f2798vwzD03nvvadu2bZozZ44WL15cHjECAOA9LHoaZ1XndgWiW7du+uijj/Tpp58qNDRUI0eO1JYtW/TRRx+pY8eO5REjAACoYi7rWRjt2rXT8uXLrY4FAACvdzmP5P7l9t7gsh+mtW7dOm3ZskXSuXkRKSkplgUFAIDXqoSncVYGtxOIffv26d5779WXX36pqKgoSdKJEyf0+9//XvPmzVOdOnWsjhEAAFQxbs+B6Nu3r4qKirRlyxYdO3ZMx44d05YtW2QYhvr27VseMQIA4D3OT6L0ZPECblcgVq1apTVr1qhx48bOdY0bN9Yrr7yidu3aWRocAADexmaeWzzZ3hu4nUAkJia6vGGUw+FQfHy8JUEBAOC1fGQOhNstjBdeeEGPPfaY1q1b51y3bt06PfHEE3rxxRctDQ4AAFRNZapAREdHy2a70JPJy8tT27ZtFRBwbvPi4mIFBATowQcfVPfu3cslUAAAvIKP3EiqTAnE5MmTyzkMAACuED7SwihTAtGrV6/yjgMAAHiRy76RlCTl5+ersLCwxLqIiAiPAgIAwKv5SAXC7UmUeXl5Sk9PV61atRQaGqro6OgSCwAAPs1HnsbpdgIxdOhQffbZZ5o2bZrsdrveeOMNjRkzRvHx8ZozZ055xAgAAKoYt1sYH330kebMmaMbb7xRffr0Ubt27ZSUlKS6detq7ty56tGjR3nECQCAd/CRqzDcrkAcO3ZMDRo0kHRuvsOxY8ckSddff72++OILa6MDAMDLnL8TpSeLN3A7gWjQoIF27dolSWrSpIkWLFgg6Vxl4vzDtQAAwJXN7QSiT58+2rhxoyRp+PDhmjp1qoKCgjRw4EANGTLE8gABAPAqPjKJ0u05EAMHDnT+d1pamrZu3aqsrCwlJSWpefPmlgYHAACqJo/uAyFJdevWVd26da2IBQAAr2eTh0/jtCyS8lWmBGLKlCll3uHjjz9+2cEAAADvUKYE4qWXXirTzmw22xWdQNzVpKUCbNUqOwygXHxy4JvKDgEoN7mnDEU3qqCD+chlnGVKIM5fdQEAAH4Ft7IGAABwzeNJlAAA4Gd8pAJBAgEAgIU8vZvkFXsnSgAAACoQAABYyUdaGJdVgVi9erXuv/9+paamav/+/ZKkt99+W5mZmZYGBwCA1/GRW1m7nUAsWrRInTt3VnBwsL755hsVFBRIkk6ePKkJEyZYHiAAAKh63E4gnnnmGU2fPl0zZsxQtWoXbqp03XXXaf369ZYGBwCAt/GVx3m7PQdi27ZtuuGGGy5aHxkZqRMnTlgREwAA3stH7kTpdgUiLi5O27dvv2h9ZmamGjRoYElQAAB4LeZAuNavXz898cQT+u9//yubzaYDBw5o7ty5Gjx4sPr3718eMQIAgCrG7RbG8OHDZRiG/vCHP+jMmTO64YYbZLfbNXjwYD322GPlESMAAF7DV24k5XYCYbPZ9PTTT2vIkCHavn27Tp8+reTkZIWFhZVHfAAAeBcfuQ/EZd9IKjAwUMnJyVbGAgAAvITbCUSHDh1ks5U+Q/Szzz7zKCAAALyap5diXqkViJYtW5Z4XVRUpA0bNuj7779Xr169rIoLAADvRAvDtZdeesnl+tGjR+v06dMeBwQAAKo+y57Gef/992vmzJlW7Q4AAO/kI/eBsOxpnGvXrlVQUJBVuwMAwCtxGWcp7rjjjhKvTdPUwYMHtW7dOo0YMcKywAAAQNXldgIRGRlZ4rWfn58aN26ssWPHqlOnTpYFBgAAqi63EgiHw6E+ffqoWbNmio6OLq+YAADwXj5yFYZbkyj9/f3VqVMnnroJAEApfOVx3m5fhXHNNddo586d5RELAADwEm4nEM8884wGDx6sxYsX6+DBg8rNzS2xAADg867wSzglN+ZAjB07Vn/96191yy23SJJuu+22Ere0Nk1TNptNDofD+igBAPAWPjIHoswJxJgxY/TII4/o888/L894AACAFyhzAmGa51Ki9u3bl1swAAB4O24k5cKlnsIJAABEC8OVRo0a/WoScezYMY8CAgAAVZ9bCcSYMWMuuhMlAAC4gBaGC3/+859Vq1at8ooFAADv5yMtjDLfB4L5DwAAVF1Tp05VvXr1FBQUpLZt2+qrr74q03bz5s2TzWZT9+7d3TpemROI81dhAACAS/DkJlKXWb2YP3++Bg0apFGjRmn9+vVq0aKFOnfurJ9++umS2+3evVuDBw9Wu3bt3D5mmRMIwzBoXwAA8Csq41kYkyZNUr9+/dSnTx8lJydr+vTpCgkJ0cyZM0vdxuFwqEePHhozZowaNGjg9jHdvpU1AAC4BIsqEL98VERBQYHLwxUWFiorK0tpaWnOdX5+fkpLS9PatWtLDXPs2LGqVauWHnroocs6TRIIAACqoMTEREVGRjqXjIwMl+OOHDkih8Oh2NjYEutjY2OVk5PjcpvMzEy9+eabmjFjxmXH59ZVGAAA4FdYdBXG3r17FRER4Vxtt9s9Cuu8U6dO6YEHHtCMGTNUs2bNy94PCQQAABay6j4QERERJRKI0tSsWVP+/v46dOhQifWHDh1SXFzcReN37Nih3bt3q2vXrs51hmFIkgICArRt2zZdffXVv3pcWhgAAHixwMBApaSkaMWKFc51hmFoxYoVSk1NvWh8kyZN9N1332nDhg3O5bbbblOHDh20YcMGJSYmlum4VCAAALBSJdxIatCgQerVq5fatGmja6+9VpMnT1ZeXp769OkjSerZs6cSEhKUkZGhoKAgXXPNNSW2j4qKkqSL1l8KCQQAABaqjFtZ33PPPTp8+LBGjhypnJwctWzZUsuWLXNOrMzOzpafn7VNBxIIAACuAOnp6UpPT3f53sqVKy+57ezZs90+HgkEAABW8pFnYZBAAABgJR9JILgKAwAAuI0KBAAAFrL9b/Fke29AAgEAgJV8pIVBAgEAgIUq4zLOysAcCAAA4DYqEAAAWIkWBgAAuCxekgR4ghYGAABwGxUIAAAs5CuTKEkgAACwko/MgaCFAQAA3EYFAgAAC9HCAAAA7qOFAQAA4BoVCAAALEQLAwAAuM9HWhgkEAAAWMlHEgjmQAAAALdRgQAAwELMgQAAAO6jhQEAAOAaFQgAACxkM03ZzMsvI3iybUUigQAAwEq0MAAAAFyjAgEAgIW4CgMAALiPFgYAAIBrVCAAALAQLQwAAOA+H2lhkEAAAGAhX6lAMAcCAAC4jQoEAABWooUBAAAuh7e0ITxBCwMAALiNCgQAAFYyzXOLJ9t7ARIIAAAsxFUYAAAApaACAQCAlbgKAwAAuMtmnFs82d4b0MIAAABuowKBctG112Hd1f8nVY8p1s7NwXp1RIK2bQgtdXy7W0+o15CDiq1TqP277HpzQry+/izC+f51N5/QHx84qobNzygi2qH+nRpp56aQEvuoXbdA/UYc0G+uPa1qgaayVkZo6t8TdOJItXI7T+C8D2fV1MJptXTscIAaJJ/VgGf2q0mrMy7HFhdJ816J1af/qq4jOdVU5+oCPfT0Af22wynnmDOn/fTW87W15uNInTgaoKt/c1b9x+1T45ZnK+qUcLl8pIXhlRWI2bNnKyoqqrLDQCna33Zcfxl1QHMnxenRLo21c3Owxs/dqcgaRS7HJ7fJ01NTd2vZP2toQOfGWvNJpEa9uUt1G1/4oAwKMbTpq1C9OT7e5T7swQ5NeHeHTFMadneSBnVvqIBqhsbO3iWbt0xphtda+e8ovT4mXj0G5WjqJ9vUIPmsnr6vgU4ccf0bbfZztbX0nRoa8Mw+zVi5VX984IjGPlRf278Ldo556a+JWv9FmIa+skfTV2xVSvtTGn5Pko4cJCGu6s5fheHJ4g0qNYHo3bu3bDbbRcv27dsrMyx46I5+h7Xs3Rr6vwU1lP1jkKYMr6OCs37q/OdjLsd3f+iw1q2M0MLptbR3e5DmvFBb278PVrc+R5xjViyqrrmT4/TN6jCX+/jNb/MUm1ioiQOv0u6twdq9NVgvPFlXDVucUcvrT5fLeQLnvfd6jLrcd1Sd/3xMdRsV6PHn9skebOiTf1Z3OX7Four682M/6do/nFLtuoXq2uuofntTrha9FiNJKjhrU+bSKPX9+0E1+12eEuoX6oHBOYqvV6DFc2pU5Knhcpy/D4Qnixeo9ApEly5ddPDgwRJL/fr1KzssXKaAaoYaNj+j9T/7ojdNm77JDFNySp7LbZqm5F2UGGStDFfTUsa7Us1uSqZUVGhzrisqsMk0pN/8lgQC5aeo0KYfvw1R63YX/s78/KRW7U5rc5brtl1RoU2B9pIz5exBhjZ9de7fgcNhk+G49BigslV6AmG32xUXF1diefnll9WsWTOFhoYqMTFRAwYM0OnTpX8JbNy4UR06dFB4eLgiIiKUkpKidevWOd/PzMxUu3btFBwcrMTERD3++OPKyyv9y6mgoEC5ubklFpRNRHWH/AN00byD44erKTqm2OU20THFOn74F+OPlD7ela1Zoco/46eHnj4ge5Ahe7BD/UYckH+AVD227PsB3JV7zF+Gw6aomJItuuiaRTp+2HULI6X9KS16PUb7dwbKMKSsVWH6cmmUjv10bnxImKGmKXl6d3KcjuYEyOGQViyK1pasUB07xNS1qo4WRiXy8/PTlClTtGnTJr311lv67LPPNHTo0FLH9+jRQ3Xq1NHXX3+trKwsDR8+XNWqnftC2rFjh7p06aI777xT3377rebPn6/MzEylp6eXur+MjAxFRkY6l8TERMvPEdY6eSxAzzxcT23TcvXBj9/q/a3fKTTSoR+/DZbpJZdEwXf0H7dPCfUL1feGpvpj3RZ69ek66nTPUdl+9ok89JU9Mk3pvtbX6NZ6LfTBmzV1Y/fjJcagijItWLxApaeyixcvVljYhZLczTffrH/961/O1/Xq1dMzzzyjRx55RK+++qrLfWRnZ2vIkCFq0qSJJKlhw4bO9zIyMtSjRw89+eSTzvemTJmi9u3ba9q0aQoKCrpof0899ZQGDRrkfJ2bm0sSUUa5x/zlKJaiav7i11hM6b/Gjh8OULQbv95Ks/6LCPW5LlkR0cVyOKS83AD985vvdXCP3b2TANwQUd0hP39TJ9yookXVcGj0rF0qzLcp93iAasQV6c3xtRV3VYFzTHy9Qr343nbln/FT3ik/1Ygt1viH66p23QKX+wQqWqXnsh06dNCGDRucy5QpU/Tpp5/qD3/4gxISEhQeHq4HHnhAR48e1Zkzri+JGjRokPr27au0tDQ9++yz2rFjh/O9jRs3avbs2QoLC3MunTt3lmEY2rVrl8v92e12RURElFhQNsVFfvrx2xC1+tnERZvNVMvrS+8Hb8kKvWiiY+sbTmlLKeN/Te7xAOXlBqjFdacUVbNY/1nO/38oP9UCTTVsfkbfZF74IWQY0oZLzPs5LzDIVM3aRXIUS5lLo5Ta+eJ2aVCIoRqxxTp1wl9ZqyJcjkHVQgujgoSGhiopKcm5FBQU6NZbb1Xz5s21aNEiZWVlaerUqZKkwsJCl/sYPXq0Nm3apD/+8Y/67LPPlJycrPfff1+SdPr0aT388MMlkpSNGzfqxx9/1NVXX11h5+lL3psRo5vvO6q0Px1TYlK+Hnt2n4KCDf3f/HMz0oe8vEd9hh9wjv/gzRi1uTFXdz78kxKvztf9gw6qYfOz+vesms4x4VHFavCbM7qq0blfX4lXF6jBb86UqFx0uvuomrTOU+26BbrpjmP6+2u79f6MGO3bcXGVCbDSHX85rI/fraHlC6KV/aNdrwyvo/wzfur0vyuPnn/8Ks2cUNs5fuv6EGUujdTBPYH67r+herrH1TIN6e4BPznHrFsZrq8/D1dOdqCyVoVp6F1JSkzKV6d7jlb4+cFNPnIVRqW3MH4pKytLhmFo4sSJ8vM7l98sWLDgV7dr1KiRGjVqpIEDB+ree+/VrFmzdPvtt6t169bavHmzkpKSyjt0/M+qD6MVWb1YPQcfVHRMsXZuCtbT9zdwTqyMiS+U8bN5CZvXherZ9HrqNfSgeg87qAO77BrzUH3t2XbhmvjfdTqpwS/tdb7+27Q9kqS3J8bqnUnnPpjrXF2gPk8dVHiUQ4f2BeqfU2L13usxFXDG8HU3djuhk0cDNOeF2jp+OEANfnNW4+fudLYwDu8PlN/Pfq4VFtj01nO1dTA7UMEhhn77h1wNnbJHYZEO55i8XH/NyqitIwerKTzKoetuOaE+ww8qgNtAoIqocglEUlKSioqK9Morr6hr16768ssvNX369FLHnz17VkOGDNFdd92l+vXra9++ffr666915513SpKGDRum3/3ud0pPT1ffvn0VGhqqzZs3a/ny5frHP/5RUaflcz6cHaMPZ7v+8h76p4YXrVu9OEqrF0eVur/lC2po+YJLX/8+MyNeMzNc32gKKG/dHjyibg8ecfneC4tK3tumeWqeZqzaesn9tb/thNrfdsKq8FCBeJx3JWnRooUmTZqk5557Ttdcc43mzp2rjIyMUsf7+/vr6NGj6tmzpxo1aqS7775bN998s8aMGSNJat68uVatWqUffvhB7dq1U6tWrTRy5EjFx/NFAwAoBz5yFYbNNL2k2VKJcnNzFRkZqRtt3RVgo36IK9Mn+7+p7BCAcpN7ylB0o506efJkuU2MP/9dkdplrAKqXf7cq+KifK1dNrJcY7VClWthAADgzXylhUECAQCAlQzz3OLJ9l6ABAIAACt5Oo/BO/KHqjeJEgAAVH1UIAAAsJBNHs6BsCyS8kUCAQCAlTy9m6SXXBxJCwMAALiNCgQAABbiMk4AAOA+rsIAAABwjQQCAAAL2UzT4+VyTJ06VfXq1VNQUJDatm2rr776qtSxM2bMULt27RQdHa3o6GilpaVdcrwrJBAAAFjJsGBx0/z58zVo0CCNGjVK69evV4sWLdS5c2f99NNPLsevXLlS9957rz7//HOtXbtWiYmJ6tSpk/bv31/mY5JAAADg5SZNmqR+/fqpT58+Sk5O1vTp0xUSEqKZM2e6HD937lwNGDBALVu2VJMmTfTGG2/IMAytWLGizMckgQAAwEJWtTByc3NLLAUFBS6PV1hYqKysLKWlpTnX+fn5KS0tTWvXri1TzGfOnFFRUZGqV69e5vMkgQAAwEqmBYukxMRERUZGOpeMjAyXhzty5IgcDodiY2NLrI+NjVVOTk6ZQh42bJji4+NLJCG/hss4AQCwkkV3oty7d68iIiKcq+12u6eRufTss89q3rx5WrlypYKCgsq8HQkEAABVUERERIkEojQ1a9aUv7+/Dh06VGL9oUOHFBcXd8ltX3zxRT377LP69NNP1bx5c7fio4UBAICFzt+J0pPFHYGBgUpJSSkxAfL8hMjU1NRSt3v++ec1btw4LVu2TG3atHH7PKlAAABgpUp4mNagQYPUq1cvtWnTRtdee60mT56svLw89enTR5LUs2dPJSQkOOdRPPfccxo5cqTeffdd1atXzzlXIiwsTGFhYWU6JgkEAABe7p577tHhw4c1cuRI5eTkqGXLllq2bJlzYmV2drb8/C40HaZNm6bCwkLdddddJfYzatQojR49ukzHJIEAAMBCNuPc4sn2lyM9PV3p6eku31u5cmWJ17t37768g/wMCQQAAFaqhBZGZWASJQAAcBsVCAAArOQjj/MmgQAAwEKePFHz/PbegBYGAABwGxUIAACs5COTKEkgAACwkinJg8s4mQMBAIAPYg4EAABAKahAAABgJVMezoGwLJJyRQIBAICVfGQSJS0MAADgNioQAABYyZBk83B7L0ACAQCAhbgKAwAAoBRUIAAAsJKPTKIkgQAAwEo+kkDQwgAAAG6jAgEAgJV8pAJBAgEAgJW4jBMAALiLyzgBAABKQQUCAAArMQcCAAC4zTAlmwdJgOEdCQQtDAAA4DYqEAAAWIkWBgAAcJ+HCYS8I4GghQEAANxGBQIAACvRwgAAAG4zTHnUhuAqDAAAcKWiAgEAgJVM49ziyfZegAQCAAArMQcCAAC4jTkQAAAArlGBAADASrQwAACA20x5mEBYFkm5ooUBAADcRgUCAAAr0cIAAABuMwxJHtzLwfCO+0DQwgAAAG6jAgEAgJVoYQAAALf5SAJBCwMAALiNCgQAAFbykVtZk0AAAGAh0zRkevBETU+2rUgkEAAAWMk0PasiMAcCAABcqahAAABgJdPDORBeUoEggQAAwEqGIdk8mMfgJXMgaGEAAAC3UYEAAMBKtDAAAIC7TMOQ6UELw1su46SFAQAA3EYFAgAAK9HCAAAAbjNMyXblJxC0MAAAgNuoQAAAYCXTlOTJfSC8owJBAgEAgIVMw5TpQQvDJIEAAMAHmYY8q0BwGScAALhCUYEAAMBCtDAAAID7fKSFQQJRBuezwWKzqJIjAcpP7inv+NACLkfu6XN/3xXx675YRR7dR6pY3vFdQwJRBqdOnZIkZWqJR38UQFUW3aiyIwDK36lTpxQZGVku+w4MDFRcXJwyc5Z6vK+4uDgFBgZaEFX5sZne0mypRIZh6MCBAwoPD5fNZqvscHxCbm6uEhMTtXfvXkVERFR2OICl+PuueKZp6tSpU4qPj5efX/ldP5Cfn6/CwkKP9xMYGKigoCALIio/VCDKwM/PT3Xq1KnsMHxSREQEH7C4YvH3XbHKq/Lwc0FBQVX+i98qXMYJAADcRgIBAADcRgKBKslut2vUqFGy2+2VHQpgOf6+cSVgEiUAAHAbFQgAAOA2EggAAOA2EggAAOA2EggAqECzZ89WVFRUZYcBeIwEAuXKZrNdchk9enRlhwhclt69e7v8m96+fXtlhwZUCO5EiXJ18OBB53/Pnz9fI0eO1LZt25zrwsLCnP9tmqYcDocCAvizhHfo0qWLZs2aVWJdTExMJUUDVCwqEChXcXFxziUyMlI2m835euvWrQoPD9fHH3+slJQU2e12ZWZmqnfv3urevXuJ/Tz55JO68cYbna8Nw1BGRobq16+v4OBgtWjRQgsXLqzYk4PPs9vtJf7G4+Li9PLLL6tZs2YKDQ1VYmKiBgwYoNOnT5e6j40bN6pDhw4KDw9XRESEUlJStG7dOuf7mZmZateunYKDg5WYmKjHH39ceXl5FXF6wCWRQKDSDR8+XM8++6y2bNmi5s2bl2mbjIwMzZkzR9OnT9emTZs0cOBA3X///Vq1alU5Rwtcmp+fn6ZMmaJNmzbprbfe0meffaahQ4eWOr5Hjx6qU6eOvv76a2VlZWn48OGqVq2aJGnHjh3q0qWL7rzzTn377beaP3++MjMzlZ6eXlGnA5SKWjEq3dixY9WxY8cyjy8oKNCECRP06aefKjU1VZLUoEEDZWZm6rXXXlP79u3LK1SghMWLF5dow918883617/+5Xxdr149PfPMM3rkkUf06quvutxHdna2hgwZoiZNmkiSGjZs6HwvIyNDPXr00JNPPul8b8qUKWrfvr2mTZvmMw9tQtVEAoFK16ZNG7fGb9++XWfOnLko6SgsLFSrVq2sDA24pA4dOmjatGnO16Ghofr000+VkZGhrVu3Kjc3V8XFxcrPz9eZM2cUEhJy0T4GDRqkvn376u2331ZaWpr+9Kc/6eqrr5Z0rr3x7bffau7cuc7xpmnKMAzt2rVLTZs2Lf+TBEpBAoFKFxoaWuK1n5+ffnmH9aKiIud/n+8nL1myRAkJCSXG8WwBVKTQ0FAlJSU5X+/evVu33nqr+vfvr/Hjx6t69erKzMzUQw89pMLCQpcJxOjRo3XfffdpyZIl+vjjjzVq1CjNmzdPt99+u06fPq2HH35Yjz/++EXbXXXVVeV6bsCvIYFAlRMTE6Pvv/++xLoNGzY4+8LJycmy2+3Kzs6mXYEqJSsrS4ZhaOLEifLzOzfFbMGCBb+6XaNGjdSoUSMNHDhQ9957r2bNmqXbb79drVu31ubNm0skKUBVwSRKVDk33XST1q1bpzlz5ujHH3/UqFGjSiQU4eHhGjx4sAYOHKi33npLO3bs0Pr16/XKK6/orbfeqsTI4euSkpJUVFSkV155RTt37tTbb7+t6dOnlzr+7NmzSk9P18qVK7Vnzx59+eWX+vrrr52tiWHDhmnNmjVKT0/Xhg0b9OOPP+rf//43kyhRJZBAoMrp3LmzRowYoaFDh+q3v/2tTp06pZ49e5YYM27cOI0YMUIZGRlq2rSpunTpoiVLlqh+/fqVFDUgtWjRQpMmTdJzzz2na665RnPnzlVGRkap4/39/XX06FH17NlTjRo10t13362bb75ZY8aMkSQ1b95cq1at0g8//KB27dqpVatWGjlypOLj4yvqlIBS8ThvAADgNioQAADAbSQQAADAbSQQAADAbSQQAADAbSQQAADAbSQQAADAbSQQAADAbSQQAADAbSQQgJfo3bu3unfv7nx94403Oh/zXJFWrlwpm82mEydOlDrGZrPpgw8+KPM+R48erZYtW3oU1+7du2Wz2bRhwwaP9gOgbEggAA/07t1bNptNNptNgYGBSkpK0tixY1VcXFzux37vvfc0bty4Mo0ty5c+ALiDp3ECHurSpYtmzZqlgoICLV26VI8++qiqVaump5566qKxhYWFCgwMtOS41atXt2Q/AHA5qEAAHrLb7YqLi1PdunXVv39/paWl6cMPP5R0oe0wfvx4xcfHq3HjxpKkvXv36u6771ZUVJSqV6+ubt26affu3c59OhwODRo0SFFRUapRo4aGDh2qXz625pctjIKCAg0bNkyJiYmy2+1KSkrSm2++qd27d6tDhw6SpOjoaNlsNvXu3VuSZBiGMjIyVL9+fQUHB6tFixZauHBhieMsXbpUjRo1UnBwsDp06FAizrIaNmyYGjVqpJCQEDVo0EAjRoxQUVHRReNee+01JSYmKiQkRHfffbdOnjxZ4v033nhDTZs2VVBQkJo0aaJXX33V7VgAWIMEArBYcHCwCgsLna9XrFihbdu2afny5Vq8eLGKiorUuXNnhYeHa/Xq1fryyy8VFhamLl26OLebOHGiZs+erZkzZyozM1PHjh3T+++/f8nj9uzZU//85z81ZcoUbdmyRa+99prCwsKUmJioRYsWSZK2bdumgwcP6uWXX5YkZWRkaM6cOZo+fbo2bdqkgQMH6v7779eqVasknUt07rjjDnXt2lUbNmxQ3759NXz4cLf/NwkPD9fs2bO1efNmvfzyy5oxY4ZeeumlEmO2b9+uBQsW6KOPPtKyZcv0zTffaMCAAc73586dq5EjR2r8+PHasmWLJkyYoBEjRvAId6CymAAuW69evcxu3bqZpmmahmGYy5cvN+12uzl48GDn+7GxsWZBQYFzm7ffftts3LixaRiGc11BQYEZHBxsfvLJJ6Zpmmbt2rXN559/3vl+UVGRWadOHeexTNM027dvbz7xxBOmaZrmtm3bTEnm8uXLXcb5+eefm5LM48ePO9fl5+ebISEh5po1a0qMfeihh8x7773XNE3TfOqpp8zk5OQS7w8bNuyiff2SJPP9998v9f0XXnjBTElJcb4eNWqU6e/vb+7bt8+57uOPPzb9/PzMgwcPmqZpmldffbX57rvvltjPuHHjzNTUVNM0TXPXrl2mJPObb74p9bgArMMcCMBDixcvVlhYmIqKimQYhu677z6NHj3a+X6zZs1KzHvYuHGjtm/frvDw8BL7yc/P144dO3Ty5EkdPHhQbdu2db4XEBCgNm3aXNTGOG/Dhg3y9/dX+/btyxz39u3bdebMGXXs2LHE+sLCQrVq1UqStGXLlhJxSFJqamqZj3He/PnzNWXKFO3YsUOnT59WcXGxIiIiSoy56qqrlJCQUOI4hmFo27ZtCg8P144dO/TQQw+pX79+zjHFxcWKjIx0Ox4AniOBADzUoUMHTZs2TYGBgYqPj1dAQMl/VqGhoSVenz59WikpKZo7d+5F+4qJibmsGIKDg93e5vTp05KkJUuWlPjils7N67DK2rVr1aNHD40ZM0adO3dWZGSk5s2bp4kTJ7od64wZMy5KaPz9/S2LFUDZkUAAHgoNDVVSUlKZx7du3Vrz589XrVq1LvoVfl7t2rX13//+VzfccIOkc7+0s7Ky1Lp1a5fjmzVrJsMwtGrVKqWlpV30/vkKiMPhcK5LTk6W3W5XdnZ2qZWLpk2bOieEnvef//zn10/yZ9asWaO6devq6aefdq7bs2fPReOys7N14MABxcfHO4/j5+enxo0bKzY2VvHx8dq5c6d69Ojh1vEBlA8mUQIVrEePHqpZs6a6deum1atXa9euXVq5cqUef/xx7du3T5L0xBNP6Nlnn9UHH3ygrVu3asCAAZe8h0O9evXUq1cvPfjgg/rggw+c+1ywYIEkqW7durLZbFq8eLEOHz6s06dPKzw8XIMHD9bAgQP11ltvaceOHVq/fr1eeeUV58TERx55RD/++KOGDBmibdu26d1339Xs2bPdOt+GDRsqOztb8+bN044dOzRlyhSXE0KDgoLUq1cvbdy4UatXr9bjjz+uu+++W3FxcZKkMWPGKCMjQ1OmTNEPP/yg7777TrNmzdKkSZPcigeANUgggAoWEhKiL774QldddZXuuOMONW3aVA899JDy8/OdFYm//vWveuCBB9SrVy+lpqYqPDxct99++yX3O23aNN11110aMGCAmjRpon79+ikvL0+SlJCQoDFjxmj48OGKjY1Venq6JGncuHEaMWKEMjIy1LRpU3Xp0kVLlixR/fr1JZ2bl7Bo0SJ98MEHatGihaZPn64JEya4db633XabBg4cqPT0dLVs2VJr1qzRiBEjLhqXlJSkO+64Q7fccos6deqk5s2bl7hMs2/fvnrjjTc0a9YsNWvWTO3bt9fs2bOdsQKoWDaztFlZAAAApaACAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3EYCAQAA3Pb/tdVwvHV1WkUAAAAASUVORK5CYII=\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "## UNSW-NB15" ], "metadata": { "id": "qjs4Ip6kFAJZ" } }, { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import pandas as pd\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "from keras import losses\n", "from keras import optimizers\n", "from keras import metrics\n", "import math\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ], "metadata": { "id": "t6hf8w1lMbqG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "496bd7cd-ee69-436f-9006-8a085e020510", "id": "CDFWCDSjFAJj" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = [\n", "\"srcip\",\n", "\"sport\",\n", "\"dstip\",\n", "\"dsport\",\n", "\"proto\",\n", "\"state\",\n", "\"dur\",\n", "\"sbytes\",\n", "\"dbytes\",\n", "\"sttl\",\n", "\"dttl\",\n", "\"sloss\",\n", "\"dloss\",\n", "\"service\",\n", "\"Sload\",\n", "\"Dload\",\n", "\"Spkts\",\n", "\"Dpkts\",\n", "\"swin\",\n", "\"dwin\",\n", "\"stcpb\",\n", "\"dtcpb\",\n", "\"smeansz\",\n", "\"dmeansz\",\n", "\"trans_depth\",\n", "\"res_bdy_len\",\n", "\"Sjit\",\n", "\"Djit\",\n", "\"Stime\",\n", "\"Ltime\",\n", "\"Sintpkt\",\n", "\"Dintpkt\",\n", "\"tcprtt\",\n", "\"synack\",\n", "\"ackdat\",\n", "\"is_sm_ips_ports\",\n", "\"ct_state_ttl\",\n", "\"ct_flw_http_mthd\",\n", "\"is_ftp_login\",\n", "\"ct_ftp_cmd\",\n", "\"ct_srv_src\",\n", "\"ct_srv_dst\",\n", "\"ct_dst_ltm\",\n", "\"ct_src_ ltm\",\n", "\"ct_src_dport_ltm\",\n", "\"ct_dst_sport_ltm\",\n", "\"ct_dst_src_ltm\",\n", "\"attack_cat\",\n", "\"Label\"\n", "]" ], "metadata": { "id": "-nz1C-V-dNpp" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import glob\n", "import os\n", "\n", "path = r'C:\\DRO\\DCL_rawdata_files' # use your path\n", "all_files = glob.glob(os.path.join(path , \"/content/drive/MyDrive/datasets/UNSW-NB15/*.csv\"))\n", "\n", "li = []\n", "\n", "for filename in all_files:\n", " df = pd.read_csv(filename, index_col=None, header=None, names=CSV_HEADER,low_memory=False)\n", " li.append(df)\n", "\n", "frame = pd.concat(li, axis=0, ignore_index=True)" ], "metadata": { "id": "D-JA_26MFAJj" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "pd.set_option('display.max_columns', None)\n", "frame" ], "metadata": { "id": "GUChMqNeFAJk", "colab": { "base_uri": "https://localhost:8080/", "height": 461 }, "outputId": "e0557a78-8300-4d34-a2a8-dac9470b903f" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " srcip sport dstip dsport proto state dur \\\n", "0 59.166.0.9 7045 149.171.126.7 25 tcp FIN 0.201886 \n", "1 59.166.0.9 9685 149.171.126.2 80 tcp FIN 5.864748 \n", "2 59.166.0.2 1421 149.171.126.4 53 udp CON 0.001391 \n", "3 59.166.0.2 21553 149.171.126.2 25 tcp FIN 0.053948 \n", "4 59.166.0.8 45212 149.171.126.4 53 udp CON 0.000953 \n", "... ... ... ... ... ... ... ... \n", "2540042 59.166.0.8 12520 149.171.126.6 31010 tcp FIN 0.020383 \n", "2540043 59.166.0.0 18895 149.171.126.9 80 tcp FIN 1.402957 \n", "2540044 59.166.0.0 30103 149.171.126.5 5190 tcp FIN 0.007108 \n", "2540045 59.166.0.6 30388 149.171.126.5 111 udp CON 0.004435 \n", "2540046 59.166.0.0 6055 149.171.126.5 54145 tcp FIN 0.072974 \n", "\n", " sbytes dbytes sttl dttl sloss dloss service Sload \\\n", "0 37552 3380 31 29 18 8 smtp 1.459438e+06 \n", "1 19410 1087890 31 29 2 370 http 2.640454e+04 \n", "2 146 178 31 29 0 0 dns 4.198418e+05 \n", "3 37812 3380 31 29 19 8 smtp 5.503374e+06 \n", "4 146 178 31 29 0 0 dns 6.128017e+05 \n", "... ... ... ... ... ... ... ... ... \n", "2540042 320 1874 31 29 1 2 - 1.047932e+05 \n", "2540043 19410 1087890 31 29 2 370 http 1.103783e+05 \n", "2540044 2158 2464 31 29 6 6 - 2.328644e+06 \n", "2540045 568 304 31 29 0 0 - 7.684329e+05 \n", "2540046 4238 60788 31 29 7 30 - 4.582454e+05 \n", "\n", " Dload Spkts Dpkts swin dwin stcpb dtcpb \\\n", "0 1.307669e+05 52 42 255 255 1422136554 3572668484 \n", "1 1.481983e+06 364 746 255 255 389619597 394688654 \n", "2 5.118620e+05 2 2 0 0 0 0 \n", "3 4.893601e+05 54 42 255 255 4047523379 1903327524 \n", "4 7.471144e+05 2 2 0 0 0 0 \n", "... ... ... ... ... ... ... ... \n", "2540042 6.436736e+05 6 8 255 255 3208686479 3225486168 \n", "2540043 6.195098e+06 364 746 255 255 283296697 2429736754 \n", "2540044 2.658413e+06 24 24 255 255 703293844 2848960529 \n", "2540045 4.112740e+05 4 4 0 0 0 0 \n", "2540046 6.571546e+06 72 72 255 255 1003293149 1003585034 \n", "\n", " smeansz dmeansz trans_depth res_bdy_len Sjit Djit \\\n", "0 722 80 0 0 456.043567 15.530109 \n", "1 53 1458 1 0 1031.366423 690.219581 \n", "2 73 89 0 0 0.000000 0.000000 \n", "3 700 80 0 0 65.909688 3.155258 \n", "4 73 89 0 0 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 53 234 0 0 212.810729 3.079195 \n", "2540043 53 1458 1 3924 203.808900 114.173588 \n", "2540044 90 103 0 0 17.627831 0.432619 \n", "2540045 142 76 0 0 1.638604 1.390643 \n", "2540046 59 844 0 0 62.045310 61.899776 \n", "\n", " Stime Ltime Sintpkt Dintpkt tcprtt synack \\\n", "0 1424250009 1424250009 3.943843 4.912488 0.000590 0.000473 \n", "1 1424250003 1424250009 16.155447 7.871279 0.000771 0.000638 \n", "2 1424250009 1424250009 0.009000 0.002000 0.000000 0.000000 \n", "3 1424250009 1424250009 1.011547 1.302561 0.000674 0.000540 \n", "4 1424250009 1424250009 0.009000 0.004000 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 1421955842 1421955842 4.007400 2.027429 0.006386 0.006189 \n", "2540043 1421955841 1421955842 3.864028 1.882421 0.000712 0.000550 \n", "2540044 1421955842 1421955842 0.274261 0.285478 0.000657 0.000532 \n", "2540045 1421955842 1421955842 1.165667 0.987333 0.000000 0.000000 \n", "2540046 1421955842 1421955842 1.022690 0.997042 0.002317 0.002173 \n", "\n", " ackdat is_sm_ips_ports ct_state_ttl ct_flw_http_mthd \\\n", "0 0.000117 0 0 NaN \n", "1 0.000133 0 0 1.0 \n", "2 0.000000 0 0 NaN \n", "3 0.000134 0 0 NaN \n", "4 0.000000 0 0 NaN \n", "... ... ... ... ... \n", "2540042 0.000197 0 0 0.0 \n", "2540043 0.000162 0 0 4.0 \n", "2540044 0.000125 0 0 0.0 \n", "2540045 0.000000 0 0 0.0 \n", "2540046 0.000144 0 0 0.0 \n", "\n", " is_ftp_login ct_ftp_cmd ct_srv_src ct_srv_dst ct_dst_ltm \\\n", "0 NaN 2 2 7 \n", "1 NaN 3 1 4 \n", "2 NaN 3 5 2 \n", "3 NaN 1 1 4 \n", "4 NaN 2 5 2 \n", "... ... ... ... ... ... \n", "2540042 0.0 0 8 20 7 \n", "2540043 0.0 0 1 1 2 \n", "2540044 0.0 0 13 13 6 \n", "2540045 0.0 0 10 13 6 \n", "2540046 0.0 0 13 13 6 \n", "\n", " ct_src_ ltm ct_src_dport_ltm ct_dst_sport_ltm ct_dst_src_ltm \\\n", "0 4 1 1 3 \n", "1 4 1 1 1 \n", "2 7 1 1 4 \n", "3 7 1 1 3 \n", "4 1 1 1 2 \n", "... ... ... ... ... \n", "2540042 5 1 1 4 \n", "2540043 7 2 2 2 \n", "2540044 7 2 1 2 \n", "2540045 5 1 1 3 \n", "2540046 7 1 1 2 \n", "\n", " attack_cat Label \n", "0 NaN 0 \n", "1 NaN 0 \n", "2 NaN 0 \n", "3 NaN 0 \n", "4 NaN 0 \n", "... ... ... \n", "2540042 NaN 0 \n", "2540043 NaN 0 \n", "2540044 NaN 0 \n", "2540045 NaN 0 \n", "2540046 NaN 0 \n", "\n", "[2540047 rows x 49 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
srcipsportdstipdsportprotostatedursbytesdbytessttldttlslossdlossserviceSloadDloadSpktsDpktsswindwinstcpbdtcpbsmeanszdmeansztrans_depthres_bdy_lenSjitDjitStimeLtimeSintpktDintpkttcprttsynackackdatis_sm_ips_portsct_state_ttlct_flw_http_mthdis_ftp_loginct_ftp_cmdct_srv_srcct_srv_dstct_dst_ltmct_src_ ltmct_src_dport_ltmct_dst_sport_ltmct_dst_src_ltmattack_catLabel
059.166.0.97045149.171.126.725tcpFIN0.2018863755233803129188smtp1.459438e+061.307669e+055242255255142213655435726684847228000456.04356715.530109142425000914242500093.9438434.9124880.0005900.0004730.00011700NaNNaN2274113NaN0
159.166.0.99685149.171.126.280tcpFIN5.86474819410108789031292370http2.640454e+041.481983e+06364746255255389619597394688654531458101031.366423690.2195811424250003142425000916.1554477.8712790.0007710.0006380.000133001.0NaN3144111NaN0
259.166.0.21421149.171.126.453udpCON0.001391146178312900dns4.198418e+055.118620e+052200007389000.0000000.000000142425000914242500090.0090000.0020000.0000000.0000000.00000000NaNNaN3527114NaN0
359.166.0.221553149.171.126.225tcpFIN0.0539483781233803129198smtp5.503374e+064.893601e+05544225525540475233791903327524700800065.9096883.155258142425000914242500091.0115471.3025610.0006740.0005400.00013400NaNNaN1147113NaN0
459.166.0.845212149.171.126.453udpCON0.000953146178312900dns6.128017e+057.471144e+052200007389000.0000000.000000142425000914242500090.0090000.0040000.0000000.0000000.00000000NaNNaN2521112NaN0
......................................................................................................................................................
254004259.166.0.812520149.171.126.631010tcpFIN0.0203833201874312912-1.047932e+056.436736e+0568255255320868647932254861685323400212.8107293.079195142195584214219558424.0074002.0274290.0063860.0061890.000197000.00.0082075114NaN0
254004359.166.0.018895149.171.126.980tcpFIN1.40295719410108789031292370http1.103783e+056.195098e+06364746255255283296697242973675453145813924203.808900114.173588142195584114219558423.8640281.8824210.0007120.0005500.000162004.00.001127222NaN0
254004459.166.0.030103149.171.126.55190tcpFIN0.00710821582464312966-2.328644e+062.658413e+0624242552557032938442848960529901030017.6278310.432619142195584214219558420.2742610.2854780.0006570.0005320.000125000.00.00131367212NaN0
254004559.166.0.630388149.171.126.5111udpCON0.004435568304312900-7.684329e+054.112740e+0544000014276001.6386041.390643142195584214219558421.1656670.9873330.0000000.0000000.000000000.00.00101365113NaN0
254004659.166.0.06055149.171.126.554145tcpFIN0.0729744238607883129730-4.582454e+056.571546e+06727225525510032931491003585034598440062.04531061.899776142195584214219558421.0226900.9970420.0023170.0021730.000144000.00.00131367112NaN0
\n", "

2540047 rows × 49 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "frame.srcip = frame.srcip.astype('category').cat.codes\n", "frame.dstip = frame.dstip.astype('category').cat.codes\n", "frame.proto = frame.proto.astype('category').cat.codes\n", "frame.state = frame.state.astype('category').cat.codes\n", "frame.service = frame.service.astype('category').cat.codes\n", "frame.ct_flw_http_mthd = frame.ct_flw_http_mthd.astype('category').cat.codes\n", "frame.is_ftp_login = frame.is_ftp_login.astype('category').cat.codes\n", "frame.ct_ftp_cmd = frame.ct_ftp_cmd.astype('category').cat.codes\n", "frame['Label'] = frame['Label'].astype(str)\n", "frame['Label'] = frame['Label'].str.replace(\"1\", \"anomaly\")\n", "frame['Label'] = frame['Label'].str.replace(\"0\", \"normal\")\n", "frame = frame.drop('attack_cat', axis=1)\n", "frame" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 461 }, "id": "Itl2LFrGfVmY", "outputId": "758bbcde-5712-4734-a070-e7c5d7eefdd5" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " srcip sport dstip dsport proto state dur sbytes dbytes \\\n", "0 42 7045 25 25 114 5 0.201886 37552 3380 \n", "1 42 9685 20 80 114 5 5.864748 19410 1087890 \n", "2 35 1421 22 53 120 2 0.001391 146 178 \n", "3 35 21553 20 25 114 5 0.053948 37812 3380 \n", "4 41 45212 22 53 120 2 0.000953 146 178 \n", "... ... ... ... ... ... ... ... ... ... \n", "2540042 41 12520 24 31010 114 5 0.020383 320 1874 \n", "2540043 33 18895 27 80 114 5 1.402957 19410 1087890 \n", "2540044 33 30103 23 5190 114 5 0.007108 2158 2464 \n", "2540045 39 30388 23 111 120 2 0.004435 568 304 \n", "2540046 33 6055 23 54145 114 5 0.072974 4238 60788 \n", "\n", " sttl dttl sloss dloss service Sload Dload Spkts \\\n", "0 31 29 18 8 9 1.459438e+06 1.307669e+05 52 \n", "1 31 29 2 370 5 2.640454e+04 1.481983e+06 364 \n", "2 31 29 0 0 2 4.198418e+05 5.118620e+05 2 \n", "3 31 29 19 8 9 5.503374e+06 4.893601e+05 54 \n", "4 31 29 0 0 2 6.128017e+05 7.471144e+05 2 \n", "... ... ... ... ... ... ... ... ... \n", "2540042 31 29 1 2 0 1.047932e+05 6.436736e+05 6 \n", "2540043 31 29 2 370 5 1.103783e+05 6.195098e+06 364 \n", "2540044 31 29 6 6 0 2.328644e+06 2.658413e+06 24 \n", "2540045 31 29 0 0 0 7.684329e+05 4.112740e+05 4 \n", "2540046 31 29 7 30 0 4.582454e+05 6.571546e+06 72 \n", "\n", " Dpkts swin dwin stcpb dtcpb smeansz dmeansz \\\n", "0 42 255 255 1422136554 3572668484 722 80 \n", "1 746 255 255 389619597 394688654 53 1458 \n", "2 2 0 0 0 0 73 89 \n", "3 42 255 255 4047523379 1903327524 700 80 \n", "4 2 0 0 0 0 73 89 \n", "... ... ... ... ... ... ... ... \n", "2540042 8 255 255 3208686479 3225486168 53 234 \n", "2540043 746 255 255 283296697 2429736754 53 1458 \n", "2540044 24 255 255 703293844 2848960529 90 103 \n", "2540045 4 0 0 0 0 142 76 \n", "2540046 72 255 255 1003293149 1003585034 59 844 \n", "\n", " trans_depth res_bdy_len Sjit Djit Stime \\\n", "0 0 0 456.043567 15.530109 1424250009 \n", "1 1 0 1031.366423 690.219581 1424250003 \n", "2 0 0 0.000000 0.000000 1424250009 \n", "3 0 0 65.909688 3.155258 1424250009 \n", "4 0 0 0.000000 0.000000 1424250009 \n", "... ... ... ... ... ... \n", "2540042 0 0 212.810729 3.079195 1421955842 \n", "2540043 1 3924 203.808900 114.173588 1421955841 \n", "2540044 0 0 17.627831 0.432619 1421955842 \n", "2540045 0 0 1.638604 1.390643 1421955842 \n", "2540046 0 0 62.045310 61.899776 1421955842 \n", "\n", " Ltime Sintpkt Dintpkt tcprtt synack ackdat \\\n", "0 1424250009 3.943843 4.912488 0.000590 0.000473 0.000117 \n", "1 1424250009 16.155447 7.871279 0.000771 0.000638 0.000133 \n", "2 1424250009 0.009000 0.002000 0.000000 0.000000 0.000000 \n", "3 1424250009 1.011547 1.302561 0.000674 0.000540 0.000134 \n", "4 1424250009 0.009000 0.004000 0.000000 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 1421955842 4.007400 2.027429 0.006386 0.006189 0.000197 \n", "2540043 1421955842 3.864028 1.882421 0.000712 0.000550 0.000162 \n", "2540044 1421955842 0.274261 0.285478 0.000657 0.000532 0.000125 \n", "2540045 1421955842 1.165667 0.987333 0.000000 0.000000 0.000000 \n", "2540046 1421955842 1.022690 0.997042 0.002317 0.002173 0.000144 \n", "\n", " is_sm_ips_ports ct_state_ttl ct_flw_http_mthd is_ftp_login \\\n", "0 0 0 -1 -1 \n", "1 0 0 1 -1 \n", "2 0 0 -1 -1 \n", "3 0 0 -1 -1 \n", "4 0 0 -1 -1 \n", "... ... ... ... ... \n", "2540042 0 0 0 0 \n", "2540043 0 0 4 0 \n", "2540044 0 0 0 0 \n", "2540045 0 0 0 0 \n", "2540046 0 0 0 0 \n", "\n", " ct_ftp_cmd ct_srv_src ct_srv_dst ct_dst_ltm ct_src_ ltm \\\n", "0 8 2 2 7 4 \n", "1 8 3 1 4 4 \n", "2 8 3 5 2 7 \n", "3 8 1 1 4 7 \n", "4 8 2 5 2 1 \n", "... ... ... ... ... ... \n", "2540042 0 8 20 7 5 \n", "2540043 0 1 1 2 7 \n", "2540044 0 13 13 6 7 \n", "2540045 0 10 13 6 5 \n", "2540046 0 13 13 6 7 \n", "\n", " ct_src_dport_ltm ct_dst_sport_ltm ct_dst_src_ltm Label \n", "0 1 1 3 normal \n", "1 1 1 1 normal \n", "2 1 1 4 normal \n", "3 1 1 3 normal \n", "4 1 1 2 normal \n", "... ... ... ... ... \n", "2540042 1 1 4 normal \n", "2540043 2 2 2 normal \n", "2540044 2 1 2 normal \n", "2540045 1 1 3 normal \n", "2540046 1 1 2 normal \n", "\n", "[2540047 rows x 48 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
srcipsportdstipdsportprotostatedursbytesdbytessttldttlslossdlossserviceSloadDloadSpktsDpktsswindwinstcpbdtcpbsmeanszdmeansztrans_depthres_bdy_lenSjitDjitStimeLtimeSintpktDintpkttcprttsynackackdatis_sm_ips_portsct_state_ttlct_flw_http_mthdis_ftp_loginct_ftp_cmdct_srv_srcct_srv_dstct_dst_ltmct_src_ ltmct_src_dport_ltmct_dst_sport_ltmct_dst_src_ltmLabel
0427045252511450.201886375523380312918891.459438e+061.307669e+055242255255142213655435726684847228000456.04356715.530109142425000914242500093.9438434.9124880.0005900.0004730.00011700-1-182274113normal
1429685208011455.8647481941010878903129237052.640454e+041.481983e+06364746255255389619597394688654531458101031.366423690.2195811424250003142425000916.1554477.8712790.0007710.0006380.000133001-183144111normal
2351421225312020.00139114617831290024.198418e+055.118620e+052200007389000.0000000.000000142425000914242500090.0090000.0020000.0000000.0000000.00000000-1-183527114normal
33521553202511450.053948378123380312919895.503374e+064.893601e+05544225525540475233791903327524700800065.9096883.155258142425000914242500091.0115471.3025610.0006740.0005400.00013400-1-181147113normal
44145212225312020.00095314617831290026.128017e+057.471144e+052200007389000.0000000.000000142425000914242500090.0090000.0040000.0000000.0000000.00000000-1-182521112normal
...................................................................................................................................................
25400424112520243101011450.020383320187431291201.047932e+056.436736e+0568255255320868647932254861685323400212.8107293.079195142195584214219558424.0074002.0274290.0063860.0061890.0001970000082075114normal
25400433318895278011451.4029571941010878903129237051.103783e+056.195098e+06364746255255283296697242973675453145813924203.808900114.173588142195584114219558423.8640281.8824210.0007120.0005500.000162004001127222normal
2540044333010323519011450.0071082158246431296602.328644e+062.658413e+0624242552557032938442848960529901030017.6278310.432619142195584214219558420.2742610.2854780.0006570.0005320.00012500000131367212normal
254004539303882311112020.00443556830431290007.684329e+054.112740e+0544000014276001.6386041.390643142195584214219558421.1656670.9873330.0000000.0000000.00000000000101365113normal
2540046336055235414511450.072974423860788312973004.582454e+056.571546e+06727225525510032931491003585034598440062.04531061.899776142195584214219558421.0226900.9970420.0023170.0021730.00014400000131367112normal
\n", "

2540047 rows × 48 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "column_headers = list(frame.columns.values)\n", "column_headers" ], "metadata": { "id": "HBU4liaUFAJk", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "49430254-abd1-4f33-cec5-14d453fd7e89" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['srcip',\n", " 'sport',\n", " 'dstip',\n", " 'dsport',\n", " 'proto',\n", " 'state',\n", " 'dur',\n", " 'sbytes',\n", " 'dbytes',\n", " 'sttl',\n", " 'dttl',\n", " 'sloss',\n", " 'dloss',\n", " 'service',\n", " 'Sload',\n", " 'Dload',\n", " 'Spkts',\n", " 'Dpkts',\n", " 'swin',\n", " 'dwin',\n", " 'stcpb',\n", " 'dtcpb',\n", " 'smeansz',\n", " 'dmeansz',\n", " 'trans_depth',\n", " 'res_bdy_len',\n", " 'Sjit',\n", " 'Djit',\n", " 'Stime',\n", " 'Ltime',\n", " 'Sintpkt',\n", " 'Dintpkt',\n", " 'tcprtt',\n", " 'synack',\n", " 'ackdat',\n", " 'is_sm_ips_ports',\n", " 'ct_state_ttl',\n", " 'ct_flw_http_mthd',\n", " 'is_ftp_login',\n", " 'ct_ftp_cmd',\n", " 'ct_srv_src',\n", " 'ct_srv_dst',\n", " 'ct_dst_ltm',\n", " 'ct_src_ ltm',\n", " 'ct_src_dport_ltm',\n", " 'ct_dst_sport_ltm',\n", " 'ct_dst_src_ltm',\n", " 'Label']" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = column_headers" ], "metadata": { "id": "C_Ip5YuJFAJk" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df = frame" ], "metadata": { "id": "zIimPNGpFAJk" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def Remove_Outlier_Indices(df):\n", " Q1 = df.quantile(0.02)\n", " Q3 = df.quantile(0.98)\n", " IQR = Q3 - Q1\n", " #trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(df)\n", "new_data = df[nonOutlierList]\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "cf0b98a2-b125-4549-be98-3660c104a082", "id": "CSOxAQvDFAJl" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":3: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":6: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] } ] }, { "cell_type": "code", "source": [ "df = new_data\n", "df = df.reset_index(drop=True)\n", "del new_data\n", "del nonOutlierList\n", "del li\n", "del frame" ], "metadata": { "id": "HDj30IAFFAJl" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import gc\n", "gc.collect()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sbG0z0rpkHyA", "outputId": "2a5f4d0b-b992-465b-ab80-9d5e95eeb4ad" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "31" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.decomposition import PCA\n", "n_components = 10\n", "columns = []\n", "for x in range(n_components):\n", " columns.append(str(x+1))\n", "\n", "X = df.drop(['Label'], axis=1).values\n", "y = df[\"Label\"].values\n", "x_scaled = StandardScaler().fit_transform(X)\n", "\n", "pca = PCA(n_components)\n", "\n", "# Fit and transform data\n", "principalComponents = pca.fit_transform(x_scaled)\n", "\n", "principalDf = pd.DataFrame(data = principalComponents\n", " , columns = columns)\n", "finalDf = pd.concat([principalDf, df[\"Label\"]], axis = 1)\n" ], "metadata": { "id": "f2NXLHjjFAJl" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model = pca\n", "X_pc = principalComponents\n", "\n", "# number of components\n", "n_pcs= model.components_.shape[0]\n", "\n", "# get the index of the most important feature on EACH component\n", "# LIST COMPREHENSION HERE\n", "most_important = [np.abs(model.components_[i]).argmax() for i in range(n_pcs)]\n", "\n", "initial_feature_names = CSV_HEADER\n", "# get the names\n", "most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]\n", "\n", "# LIST COMPREHENSION HERE AGAIN\n", "dic = {'PC{}'.format(i): most_important_names[i] for i in range(n_pcs)}\n", "\n", "# build the dataframe\n", "dfx = pd.DataFrame(dic.items())\n", "dfx" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "outputId": "3e4cae10-de94-4634-f748-49e8f3613266", "id": "BPBMVK7JFAJl" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " 0 1\n", "0 PC0 ct_dst_src_ltm\n", "1 PC1 Sintpkt\n", "2 PC2 Spkts\n", "3 PC3 tcprtt\n", "4 PC4 dbytes\n", "5 PC5 Ltime\n", "6 PC6 smeansz\n", "7 PC7 dstip\n", "8 PC8 sport\n", "9 PC9 trans_depth" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0PC0ct_dst_src_ltm
1PC1Sintpkt
2PC2Spkts
3PC3tcprtt
4PC4dbytes
5PC5Ltime
6PC6smeansz
7PC7dstip
8PC8sport
9PC9trans_depth
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "AKxpbbLDFAJm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "del finalDf\n", "del principalDf\n", "del train_data\n", "del test_data" ], "metadata": { "id": "L9qPLpF6FAJm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "gc.collect()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3IslsX9SkfSu", "outputId": "c2868f98-fbe4-4949-960e-00bc74e8efeb" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\"Label\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \"Label\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"normal\", \"anomaly\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "08677cf6-ca87-4167-e052-7ff369682416", "id": "1_26_8h7FAJm" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9c964e00-9015-4791-e8d0-916e62c7afe6", "id": "IybH5is1FAJn" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "14290/14290 [==============================] - 435s 29ms/step - loss: 0.0348 - sparse_categorical_accuracy: 0.9868\n", "Epoch 2/10\n", "14290/14290 [==============================] - 350s 24ms/step - loss: 0.0289 - sparse_categorical_accuracy: 0.9887\n", "Epoch 3/10\n", "14290/14290 [==============================] - 351s 25ms/step - loss: 0.0273 - sparse_categorical_accuracy: 0.9890\n", "Epoch 4/10\n", "14290/14290 [==============================] - 353s 25ms/step - loss: 0.0251 - sparse_categorical_accuracy: 0.9890\n", "Epoch 5/10\n", "14290/14290 [==============================] - 352s 25ms/step - loss: 0.0248 - sparse_categorical_accuracy: 0.9890\n", "Epoch 6/10\n", "14290/14290 [==============================] - 360s 25ms/step - loss: 0.0246 - sparse_categorical_accuracy: 0.9891\n", "Epoch 7/10\n", "14290/14290 [==============================] - 354s 25ms/step - loss: 0.0244 - sparse_categorical_accuracy: 0.9890\n", "Epoch 8/10\n", "14290/14290 [==============================] - 370s 26ms/step - loss: 0.0243 - sparse_categorical_accuracy: 0.9891\n", "Epoch 9/10\n", "14290/14290 [==============================] - 360s 25ms/step - loss: 0.0242 - sparse_categorical_accuracy: 0.9892\n", "Epoch 10/10\n", "14290/14290 [==============================] - 356s 25ms/step - loss: 0.0241 - sparse_categorical_accuracy: 0.9892\n", "Model training finished\n", "Evaluating the model on the test data...\n", "4764/4764 [==============================] - 59s 12ms/step - loss: 0.0223 - sparse_categorical_accuracy: 0.9893\n", "Test accuracy: 98.93%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('normal', 0,inplace=True)\n", "data['target'].replace('anomaly', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588 }, "outputId": "f6abffa5-93ff-464b-ce93-18dde70585fd", "id": "pPffXGpiFAJo" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4764/4764 [==============================] - 56s 11ms/step\n", "[[0.9963139 0.05992979]\n", " [0.0036861 0.94007021]]\n", "ACC: 0.9681920531916617\n", "PR: 0.9432613986597967\n", "TPR: 0.9963138957514277\n", "FPR: 0.059929789368104315\n", "F1Score: 0.9690620843837493\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] } ] }