{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import pandas as pd\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "from keras import losses\n", "from keras import optimizers\n", "from keras import metrics\n", "import math\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ], "metadata": { "id": "sIDDU2PYPdH_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = [\n", " \"duration\",\n", " \"protocol_type\",\n", " \"service\",\n", " \"flag\",\n", " \"src_bytes\",\n", " \"dst_bytes\",\n", " \"land\",\n", " \"wrong_fragment\",\n", " \"urgent\",\n", " \"hot\",\n", " \"num_failed_logins\",\n", " \"logged_in\",\n", " \"num_compromised\",\n", " \"root_shell\",\n", " \"su_attempted\",\n", " \"num_root\",\n", " \"num_file_creations\",\n", " \"num_shells\",\n", " \"num_access_files\",\n", " \"num_outbound_cmds\",\n", " \"is_host_login\",\n", " \"is_guest_login\",\n", " \"count\",\n", " \"srv_count\",\n", " \"serror_rate\",\n", " \"srv_serror_rate\",\n", " \"rerror_rate\",\n", " \"srv_rerror_rate\",\n", " \"same_srv_rate\",\n", " \"diff_srv_rate\",\n", " \"srv_diff_host_rate\",\n", " \"dst_host_count\",\n", " \"dst_host_srv_count\",\n", " \"dst_host_same_srv_rate\",\n", " \"dst_host_diff_srv_rate\",\n", " \"dst_host_same_src_port_rate\",\n", " \"dst_host_srv_diff_host_rate\",\n", " \"dst_host_serror_rate\",\n", " \"dst_host_srv_serror_rate\",\n", " \"dst_host_rerror_rate\",\n", " \"dst_host_srv_rerror_rate\",\n", " \"class\"\n", "]\n", "\n", "\n", "train_data = pd.read_csv(\"train.csv\", header=None, names=CSV_HEADER)\n", "\n", "test_data = pd.read_csv(\"test.csv\", header=None, names=CSV_HEADER)\n", "\n", "print(f\"Train dataset shape: {train_data.shape}\")\n", "print(f\"Test dataset shape: {test_data.shape}\")\n", "train_data['class'] = train_data['class'].str.replace(r\"^(.(?:53: FutureWarning: The default value of regex will change from True to False in a future version.\n", " train_data['class'] = train_data['class'].str.replace(r\"^(.(?:54: FutureWarning: The default value of regex will change from True to False in a future version.\n", " test_data['class'] = test_data['class'].str.replace(r\"^(.(?" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "## Removing outliers and duplicates" ], "metadata": { "id": "wHhIvDl9V5kl" } }, { "cell_type": "code", "source": [ "def Remove_Outlier_Indices(df):\n", " Q1 = df.quantile(0.02)\n", " Q3 = df.quantile(0.98)\n", " IQR = Q3 - Q1\n", " #trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(train_data)\n", "new_train_data = train_data[nonOutlierList]\n", "\n", "nonOutlierList = Remove_Outlier_Indices(test_data)\n", "new_test_data = test_data[nonOutlierList]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8GZwWkuSV5HT", "outputId": "b19ef0b2-7d69-428c-fc19-cef6c9e6f076" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":3: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":6: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Merging train and test datasets" ], "metadata": { "id": "uNzhq0uHKkUB" } }, { "cell_type": "code", "source": [ "frames = [new_train_data, new_test_data]\n", "df = pd.concat(frames)\n", "df = df.reset_index(drop=True)\n", "df" ], "metadata": { "id": "3kxP6kBRGX2y", "colab": { "base_uri": "https://localhost:8080/", "height": 444 }, "outputId": "073365f1-5924-45fc-cf1c-041d7b8e63a3" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " duration protocol_type service flag src_bytes dst_bytes land \\\n", "0 0 1 20 9 491 0 0 \n", "1 0 2 44 9 146 0 0 \n", "2 0 1 49 5 0 0 0 \n", "3 0 1 24 9 232 8153 0 \n", "4 0 1 24 9 199 420 0 \n", "... ... ... ... ... ... ... ... \n", "139899 0 0 14 9 1032 0 0 \n", "139900 0 1 49 9 794 333 0 \n", "139901 0 1 22 9 317 938 0 \n", "139902 0 2 11 9 42 42 0 \n", "139903 0 1 52 1 0 0 0 \n", "\n", " wrong_fragment urgent hot ... dst_host_srv_count \\\n", "0 0 0 0 ... 25 \n", "1 0 0 0 ... 1 \n", "2 0 0 0 ... 26 \n", "3 0 0 0 ... 255 \n", "4 0 0 0 ... 255 \n", "... ... ... ... ... ... \n", "139899 0 0 0 ... 255 \n", "139900 0 0 0 ... 141 \n", "139901 0 0 0 ... 255 \n", "139902 0 0 0 ... 252 \n", "139903 0 0 0 ... 21 \n", "\n", " dst_host_same_srv_rate dst_host_diff_srv_rate \\\n", "0 0.17 0.03 \n", "1 0.00 0.60 \n", "2 0.10 0.05 \n", "3 1.00 0.00 \n", "4 1.00 0.00 \n", "... ... ... \n", "139899 1.00 0.00 \n", "139900 0.72 0.06 \n", "139901 1.00 0.00 \n", "139902 0.99 0.01 \n", "139903 0.08 0.03 \n", "\n", " dst_host_same_src_port_rate dst_host_srv_diff_host_rate \\\n", "0 0.17 0.00 \n", "1 0.88 0.00 \n", "2 0.00 0.00 \n", "3 0.03 0.04 \n", "4 0.00 0.00 \n", "... ... ... \n", "139899 1.00 0.00 \n", "139900 0.01 0.01 \n", "139901 0.01 0.01 \n", "139902 0.00 0.00 \n", "139903 0.00 0.00 \n", "\n", " dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate \\\n", "0 0.00 0.00 0.05 \n", "1 0.00 0.00 0.00 \n", "2 1.00 1.00 0.00 \n", "3 0.03 0.01 0.00 \n", "4 0.00 0.00 0.00 \n", "... ... ... ... \n", "139899 0.00 0.00 0.00 \n", "139900 0.01 0.00 0.00 \n", "139901 0.01 0.00 0.00 \n", "139902 0.00 0.00 0.00 \n", "139903 0.00 0.00 0.44 \n", "\n", " dst_host_srv_rerror_rate class \n", "0 0.00 normal \n", "1 0.00 normal \n", "2 0.00 anomaly \n", "3 0.01 normal \n", "4 0.00 normal \n", "... ... ... \n", "139899 0.00 anomaly \n", "139900 0.00 normal \n", "139901 0.00 normal \n", "139902 0.00 normal \n", "139903 1.00 anomaly \n", "\n", "[139904 rows x 42 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
durationprotocol_typeserviceflagsrc_bytesdst_byteslandwrong_fragmenturgenthot...dst_host_srv_countdst_host_same_srv_ratedst_host_diff_srv_ratedst_host_same_src_port_ratedst_host_srv_diff_host_ratedst_host_serror_ratedst_host_srv_serror_ratedst_host_rerror_ratedst_host_srv_rerror_rateclass
00120949100000...250.170.030.170.000.000.000.050.00normal
10244914600000...10.000.600.880.000.000.000.000.00normal
201495000000...260.100.050.000.001.001.000.000.00anomaly
30124923281530000...2551.000.000.030.040.030.010.000.01normal
4012491994200000...2551.000.000.000.000.000.000.000.00normal
..................................................................
13989900149103200000...2551.000.001.000.000.000.000.000.00anomaly
139900014997943330000...1410.720.060.010.010.010.000.000.00normal
139901012293179380000...2551.000.000.010.010.010.000.000.00normal
1399020211942420000...2520.990.010.000.000.000.000.000.00normal
13990301521000000...210.080.030.000.000.000.000.441.00anomaly
\n", "

139904 rows × 42 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "markdown", "source": [ "## Feature Selection" ], "metadata": { "id": "FiXD9jj7JzAt" } }, { "cell_type": "code", "source": [ "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import f_regression\n", "\n", "selector = SelectKBest(f_regression, k=10)\n", "X = df.drop(['class'], axis=1)\n", "Y = df[\"class\"].astype('category').cat.codes\n", "X_new = selector.fit(X, Y)\n", "X.columns.values[selector.get_support()]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AeldOcvmp9oy", "outputId": "6c7920b9-7355-4dd1-f6ab-ddd17f3c73ed" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['flag', 'logged_in', 'count', 'serror_rate', 'srv_serror_rate',\n", " 'same_srv_rate', 'dst_host_srv_count', 'dst_host_same_srv_rate',\n", " 'dst_host_serror_rate', 'dst_host_srv_serror_rate'], dtype=object)" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "columns = X.columns.values[selector.get_support()]" ], "metadata": { "id": "CcnIqyU6u_lU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "columns = []\n", "for c in X.columns.values[selector.get_support()]:\n", " columns.append(str(c))" ], "metadata": { "id": "tUrifaWBvZpx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "principalDf = pd.DataFrame(data = X\n", " , columns = X.columns.values[selector.get_support()])\n", "finalDf = pd.concat([principalDf, df[\"class\"]], axis = 1)\n", "finalDf" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "RzFbtWLlsmn0", "outputId": "cb087fd1-8a86-4031-b239-65c5902a96df" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " flag logged_in count serror_rate srv_serror_rate same_srv_rate \\\n", "0 9 0 2 0.0 0.0 1.00 \n", "1 9 0 13 0.0 0.0 0.08 \n", "2 5 0 123 1.0 1.0 0.05 \n", "3 9 1 5 0.2 0.2 1.00 \n", "4 9 1 30 0.0 0.0 1.00 \n", "... ... ... ... ... ... ... \n", "139899 9 0 53 0.0 0.0 1.00 \n", "139900 9 1 1 0.0 0.0 1.00 \n", "139901 9 1 2 0.0 0.0 1.00 \n", "139902 9 0 4 0.0 0.0 1.00 \n", "139903 1 0 4 0.0 0.0 0.25 \n", "\n", " dst_host_srv_count dst_host_same_srv_rate dst_host_serror_rate \\\n", "0 25 0.17 0.00 \n", "1 1 0.00 0.00 \n", "2 26 0.10 1.00 \n", "3 255 1.00 0.03 \n", "4 255 1.00 0.00 \n", "... ... ... ... \n", "139899 255 1.00 0.00 \n", "139900 141 0.72 0.01 \n", "139901 255 1.00 0.01 \n", "139902 252 0.99 0.00 \n", "139903 21 0.08 0.00 \n", "\n", " dst_host_srv_serror_rate class \n", "0 0.00 normal \n", "1 0.00 normal \n", "2 1.00 anomaly \n", "3 0.01 normal \n", "4 0.00 normal \n", "... ... ... \n", "139899 0.00 anomaly \n", "139900 0.00 normal \n", "139901 0.00 normal \n", "139902 0.00 normal \n", "139903 0.00 anomaly \n", "\n", "[139904 rows x 11 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
flaglogged_incountserror_ratesrv_serror_ratesame_srv_ratedst_host_srv_countdst_host_same_srv_ratedst_host_serror_ratedst_host_srv_serror_rateclass
09020.00.01.00250.170.000.00normal
190130.00.00.0810.000.000.00normal
2501231.01.00.05260.101.001.00anomaly
39150.20.21.002551.000.030.01normal
491300.00.01.002551.000.000.00normal
....................................
13989990530.00.01.002551.000.000.00anomaly
1399009110.00.01.001410.720.010.00normal
1399019120.00.01.002551.000.010.00normal
1399029040.00.01.002520.990.000.00normal
1399031040.00.00.25210.080.000.00anomaly
\n", "

139904 rows × 11 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "markdown", "source": [ "# Model" ], "metadata": { "id": "B6B5ZuR5J5WQ" } }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "_1N70b_DJb2m" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\"class\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \"class\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"normal\", \"anomaly\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "id": "PGCEwlOPPpEP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3dbd7868-4ff2-43c1-ad87-85b0c0fa02ec" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vLjxBfBIQUKR", "outputId": "61de3670-6ab4-47e9-d43c-3a0ef8f65cbd" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "820/820 [==============================] - 47s 27ms/step - loss: 0.2367 - sparse_categorical_accuracy: 0.9130\n", "Epoch 2/10\n", "820/820 [==============================] - 16s 20ms/step - loss: 0.1888 - sparse_categorical_accuracy: 0.9268\n", "Epoch 3/10\n", "820/820 [==============================] - 18s 21ms/step - loss: 0.1845 - sparse_categorical_accuracy: 0.9291\n", "Epoch 4/10\n", "820/820 [==============================] - 17s 21ms/step - loss: 0.1821 - sparse_categorical_accuracy: 0.9304\n", "Epoch 5/10\n", "820/820 [==============================] - 17s 20ms/step - loss: 0.1800 - sparse_categorical_accuracy: 0.9312\n", "Epoch 6/10\n", "820/820 [==============================] - 16s 20ms/step - loss: 0.1782 - sparse_categorical_accuracy: 0.9317\n", "Epoch 7/10\n", "820/820 [==============================] - 17s 20ms/step - loss: 0.1771 - sparse_categorical_accuracy: 0.9320\n", "Epoch 8/10\n", "820/820 [==============================] - 20s 24ms/step - loss: 0.1763 - sparse_categorical_accuracy: 0.9323\n", "Epoch 9/10\n", "820/820 [==============================] - 17s 20ms/step - loss: 0.1753 - sparse_categorical_accuracy: 0.9327\n", "Epoch 10/10\n", "820/820 [==============================] - 16s 20ms/step - loss: 0.1746 - sparse_categorical_accuracy: 0.9327\n", "Model training finished\n", "Evaluating the model on the test data...\n", "274/274 [==============================] - 6s 13ms/step - loss: 0.1681 - sparse_categorical_accuracy: 0.9350\n", "Test accuracy: 93.5%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('normal', 0,inplace=True)\n", "data['target'].replace('anomaly', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588 }, "id": "Jwbb045sQ-BF", "outputId": "d086aa83-cb08-4595-9ec0-ce8d11c03244" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "274/274 [==============================] - 4s 9ms/step\n", "[[0.91360217 0.03922436]\n", " [0.08639783 0.96077564]]\n", "ACC: 0.9371889086346885\n", "PR: 0.95883368635097\n", "TPR: 0.9136021730045968\n", "FPR: 0.03922435573521981\n", "F1Score: 0.9356716119522974\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "## CISIDS2017" ], "metadata": { "id": "lx4RpN47ybt6" } }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kHU-PAI6yhyz", "outputId": "ad0343de-eb95-4965-99b4-afddeef85d72" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import glob\n", "import os\n", "\n", "path = r'C:\\DRO\\DCL_rawdata_files' # use your path\n", "all_files = glob.glob(os.path.join(path , \"/content/drive/MyDrive/datasets/CISIDS2017/*.csv\"))\n", "\n", "li = []\n", "\n", "for filename in all_files:\n", " df = pd.read_csv(filename, index_col=None, header=0)\n", " li.append(df)\n", "\n", "frame = pd.concat(li, axis=0, ignore_index=True)\n", "frame" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 496 }, "id": "orUywltc2yaK", "outputId": "d3e81ead-2ee5-4627-d1e6-6c2c054d2489" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Destination Port Flow Duration Total Fwd Packets \\\n", "0 54865 3 2 \n", "1 55054 109 1 \n", "2 55055 52 1 \n", "3 46236 34 1 \n", "4 54863 3 2 \n", "... ... ... ... \n", "2830738 53 32215 4 \n", "2830739 53 324 2 \n", "2830740 58030 82 2 \n", "2830741 53 1048635 6 \n", "2830742 53 94939 4 \n", "\n", " Total Backward Packets Total Length of Fwd Packets \\\n", "0 0 12 \n", "1 1 6 \n", "2 1 6 \n", "3 1 6 \n", "4 0 12 \n", "... ... ... \n", "2830738 2 112 \n", "2830739 2 84 \n", "2830740 1 31 \n", "2830741 2 192 \n", "2830742 2 188 \n", "\n", " Total Length of Bwd Packets Fwd Packet Length Max \\\n", "0 0 6 \n", "1 6 6 \n", "2 6 6 \n", "3 6 6 \n", "4 0 6 \n", "... ... ... \n", "2830738 152 28 \n", "2830739 362 42 \n", "2830740 6 31 \n", "2830741 256 32 \n", "2830742 226 47 \n", "\n", " Fwd Packet Length Min Fwd Packet Length Mean \\\n", "0 6 6.0 \n", "1 6 6.0 \n", "2 6 6.0 \n", "3 6 6.0 \n", "4 6 6.0 \n", "... ... ... \n", "2830738 28 28.0 \n", "2830739 42 42.0 \n", "2830740 0 15.5 \n", "2830741 32 32.0 \n", "2830742 47 47.0 \n", "\n", " Fwd Packet Length Std ... min_seg_size_forward Active Mean \\\n", "0 0.00000 ... 20 0.0 \n", "1 0.00000 ... 20 0.0 \n", "2 0.00000 ... 20 0.0 \n", "3 0.00000 ... 20 0.0 \n", "4 0.00000 ... 20 0.0 \n", "... ... ... ... ... \n", "2830738 0.00000 ... 20 0.0 \n", "2830739 0.00000 ... 20 0.0 \n", "2830740 21.92031 ... 32 0.0 \n", "2830741 0.00000 ... 20 0.0 \n", "2830742 0.00000 ... 20 0.0 \n", "\n", " Active Std Active Max Active Min Idle Mean Idle Std \\\n", "0 0.0 0 0 0.0 0.0 \n", "1 0.0 0 0 0.0 0.0 \n", "2 0.0 0 0 0.0 0.0 \n", "3 0.0 0 0 0.0 0.0 \n", "4 0.0 0 0 0.0 0.0 \n", "... ... ... ... ... ... \n", "2830738 0.0 0 0 0.0 0.0 \n", "2830739 0.0 0 0 0.0 0.0 \n", "2830740 0.0 0 0 0.0 0.0 \n", "2830741 0.0 0 0 0.0 0.0 \n", "2830742 0.0 0 0 0.0 0.0 \n", "\n", " Idle Max Idle Min Label \n", "0 0 0 BENIGN \n", "1 0 0 BENIGN \n", "2 0 0 BENIGN \n", "3 0 0 BENIGN \n", "4 0 0 BENIGN \n", "... ... ... ... \n", "2830738 0 0 BENIGN \n", "2830739 0 0 BENIGN \n", "2830740 0 0 BENIGN \n", "2830741 0 0 BENIGN \n", "2830742 0 0 BENIGN \n", "\n", "[2830743 rows x 79 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Destination PortFlow DurationTotal Fwd PacketsTotal Backward PacketsTotal Length of Fwd PacketsTotal Length of Bwd PacketsFwd Packet Length MaxFwd Packet Length MinFwd Packet Length MeanFwd Packet Length Std...min_seg_size_forwardActive MeanActive StdActive MaxActive MinIdle MeanIdle StdIdle MaxIdle MinLabel
054865320120666.00.00000...200.00.0000.00.000BENIGN
1550541091166666.00.00000...200.00.0000.00.000BENIGN
255055521166666.00.00000...200.00.0000.00.000BENIGN
346236341166666.00.00000...200.00.0000.00.000BENIGN
454863320120666.00.00000...200.00.0000.00.000BENIGN
..................................................................
2830738533221542112152282828.00.00000...200.00.0000.00.000BENIGN
2830739533242284362424242.00.00000...200.00.0000.00.000BENIGN
283074058030822131631015.521.92031...320.00.0000.00.000BENIGN
283074153104863562192256323232.00.00000...200.00.0000.00.000BENIGN
2830742539493942188226474747.00.00000...200.00.0000.00.000BENIGN
\n", "

2830743 rows × 79 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "source": [ "column_headers = list(frame.columns.values)\n", "column_headers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eJcGQKBn4Abs", "outputId": "2ceaad2f-38fd-461e-9241-6f59c496f0a4" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[' Destination Port',\n", " ' Flow Duration',\n", " ' Total Fwd Packets',\n", " ' Total Backward Packets',\n", " 'Total Length of Fwd Packets',\n", " ' Total Length of Bwd Packets',\n", " ' Fwd Packet Length Max',\n", " ' Fwd Packet Length Min',\n", " ' Fwd Packet Length Mean',\n", " ' Fwd Packet Length Std',\n", " 'Bwd Packet Length Max',\n", " ' Bwd Packet Length Min',\n", " ' Bwd Packet Length Mean',\n", " ' Bwd Packet Length Std',\n", " 'Flow Bytes/s',\n", " ' Flow Packets/s',\n", " ' Flow IAT Mean',\n", " ' Flow IAT Std',\n", " ' Flow IAT Max',\n", " ' Flow IAT Min',\n", " 'Fwd IAT Total',\n", " ' Fwd IAT Mean',\n", " ' Fwd IAT Std',\n", " ' Fwd IAT Max',\n", " ' Fwd IAT Min',\n", " 'Bwd IAT Total',\n", " ' Bwd IAT Mean',\n", " ' Bwd IAT Std',\n", " ' Bwd IAT Max',\n", " ' Bwd IAT Min',\n", " 'Fwd PSH Flags',\n", " ' Bwd PSH Flags',\n", " ' Fwd URG Flags',\n", " ' Bwd URG Flags',\n", " ' Fwd Header Length',\n", " ' Bwd Header Length',\n", " 'Fwd Packets/s',\n", " ' Bwd Packets/s',\n", " ' Min Packet Length',\n", " ' Max Packet Length',\n", " ' Packet Length Mean',\n", " ' Packet Length Std',\n", " ' Packet Length Variance',\n", " 'FIN Flag Count',\n", " ' SYN Flag Count',\n", " ' RST Flag Count',\n", " ' PSH Flag Count',\n", " ' ACK Flag Count',\n", " ' URG Flag Count',\n", " ' CWE Flag Count',\n", " ' ECE Flag Count',\n", " ' Down/Up Ratio',\n", " ' Average Packet Size',\n", " ' Avg Fwd Segment Size',\n", " ' Avg Bwd Segment Size',\n", " ' Fwd Header Length.1',\n", " 'Fwd Avg Bytes/Bulk',\n", " ' Fwd Avg Packets/Bulk',\n", " ' Fwd Avg Bulk Rate',\n", " ' Bwd Avg Bytes/Bulk',\n", " ' Bwd Avg Packets/Bulk',\n", " 'Bwd Avg Bulk Rate',\n", " 'Subflow Fwd Packets',\n", " ' Subflow Fwd Bytes',\n", " ' Subflow Bwd Packets',\n", " ' Subflow Bwd Bytes',\n", " 'Init_Win_bytes_forward',\n", " ' Init_Win_bytes_backward',\n", " ' act_data_pkt_fwd',\n", " ' min_seg_size_forward',\n", " 'Active Mean',\n", " ' Active Std',\n", " ' Active Max',\n", " ' Active Min',\n", " 'Idle Mean',\n", " ' Idle Std',\n", " ' Idle Max',\n", " ' Idle Min',\n", " ' Label']" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "source": [ "frame[' Label'] = frame[' Label'].str.replace(r\"^(.(?:1: FutureWarning: The default value of regex will change from True to False in a future version.\n", " frame[' Label'] = frame[' Label'].str.replace(r\"^(.(?" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAsDElEQVR4nO3de1RVdcL/8c8R9Ihy8cpFI294y0zUvOA8421QNGpl+qg5+ojXpykrjdLieSYda4yavLUayxxBsyYlbdSyxmRowFLMTLE0L3nFFFBLrioo7N8f/ThPJ0AFkQNf36+19lqdvb97n+9mrYPv9tnnYLMsyxIAAIAharl6AgAAAJWJuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBcMutXLlSNptNJ06ccPVUANwGiBsAcKFPPvlEf/rTn1w9DcAoxA0AuNAnn3yiuXPnunoagFGIGwAAYBTiBoCTdevWyWazKSkpqcS2t956SzabTfv27ZMkffPNN5owYYJat26tunXryt/fX5MmTdKPP/543eex2Wylvh3TsmVLTZgwwWldZmamZsyYocDAQNntdgUFBemVV15RUVHRDZ3TP//5T/Xr109eXl7y9vZWjx499N577zmNWbt2rbp37y4PDw81adJE48aN0+nTp53G9O/fX/379y9x/AkTJqhly5aOxydOnJDNZtP8+fO1bNkytWnTRna7XT169NBXX33ltN+SJUscP4/iBcDNcXf1BABUL+Hh4fL09NT777+vfv36OW2Li4tTp06ddPfdd0uS4uPjdezYMU2cOFH+/v7av3+/li1bpv3792vHjh2V8g/1xYsX1a9fP50+fVqPPPKI7rzzTm3fvl1RUVFKS0vT4sWLr7n/ypUrNWnSJHXq1ElRUVFq0KCB9uzZo82bN+v3v/+9Y8zEiRPVo0cPRUdHKyMjQ6+99pq2bdumPXv2qEGDBhWa+3vvvaecnBw98sgjstls+stf/qLhw4fr2LFjql27th555BGdOXNG8fHxeueddyr0HABKYQHAr4wZM8by9fW1rl696liXlpZm1apVy3rhhRcc6y5evFhi39WrV1uSrK1btzrWrVixwpJkHT9+3LFOkjVnzpwS+7do0cKKiIhwPH7xxRet+vXrW4cPH3Ya99xzz1lubm5WampqmeeRmZlpeXl5Wb169bIuXbrktK2oqMiyLMsqKCiwfH19rbvvvttpzKZNmyxJ1uzZsx3r+vXrZ/Xr16/E80RERFgtWrRwPD5+/LglyWrcuLH1008/OdZv3LjRkmR99NFHjnXTpk2z+FUMVC7elgJQwujRo3X27FklJiY61q1bt05FRUUaPXq0Y52Hh4fjvy9fvqzz58+rd+/ekqTdu3dXylzWrl2r3/72t2rYsKHOnz/vWEJDQ1VYWKitW7eWuW98fLxycnL03HPPqW7duk7biq8q7dq1S2fPntVjjz3mNCY8PFwdOnTQxx9/XOG5jx49Wg0bNnQ8/u1vfytJOnbsWIWPCeD6buu42bp1qx544AE1a9ZMNptNGzZsKPcxLMvS/Pnz1a5dO9ntdjVv3lzz5s2r/MkCVWjIkCHy8fFRXFycY11cXJyCg4PVrl07x7qffvpJ06dPl5+fnzw8PNS0aVO1atVKkpSVlVUpc/n++++1efNmNW3a1GkJDQ2VJJ09e7bMfY8ePSpJjrfRSnPy5ElJUvv27Uts69Chg2N7Rdx5551Oj4tD58KFCxU+JoDru63vucnLy1OXLl00adIkDR8+vELHmD59urZs2aL58+erc+fO+umnn/TTTz9V8kyBqmW32zVs2DCtX79eb7zxhjIyMrRt2za99NJLTuNGjRql7du3a+bMmQoODpanp6eKioo0ZMiQG77Z99cKCwudHhcVFWnQoEGaNWtWqeN/GVu3ms1mk2VZJdb/es7F3NzcSl1f2jEAVJ7bOm6GDh2qoUOHlrk9Pz9f//u//6vVq1crMzNTd999t1555RXHpyUOHDigN998U/v27XP8X1/x/7UCNd3o0aP19ttvKyEhQQcOHJBlWU5vSV24cEEJCQmaO3euZs+e7Vj//fff39DxGzZsqMzMTKd1BQUFSktLc1rXpk0b5ebmOq7UlEebNm0kSfv27VNQUFCpY1q0aCFJOnTokAYOHOi07dChQ47txXMu7S2lm7m6w6ejgMp3W78tdT2PP/64kpOTtWbNGn3zzTcaOXKkhgwZ4vjl/dFHH6l169batGmTWrVqpZYtW2rKlClcuYERQkND1ahRI8XFxSkuLk49e/Z0ivfiqxK/vgpxvU8vFWvTpk2J+2WWLVtW4irIqFGjlJycrE8//bTEMTIzM3X16tUyn2Pw4MHy8vJSdHS0Ll++7LSteN733nuvfH19tXTpUuXn5zu2//Of/9SBAwcUHh7uNOeDBw/q3LlzjnV79+7Vtm3bbuCMS1e/fn3HuQCoHLf1lZtrSU1N1YoVK5SamqpmzZpJkp555hlt3rxZK1as0EsvvaRjx47p5MmTWrt2rVatWqXCwkI99dRT+s///E999tlnLj4D4ObUrl1bw4cP15o1a5SXl6f58+c7bff29lbfvn31l7/8RVeuXFHz5s21ZcsWHT9+/IaOP2XKFP3hD3/QiBEjNGjQIO3du1effvqpmjRp4jRu5syZ+vDDD3X//fdrwoQJ6t69u/Ly8vTtt99q3bp1OnHiRIl9fjnHRYsWacqUKerRo4d+//vfq2HDhtq7d68uXryot99+W7Vr19Yrr7yiiRMnql+/fhozZozjo+AtW7bUU0895TjepEmTtHDhQoWFhWny5Mk6e/asli5dqk6dOik7O7ucP+Gfde/eXZL05JNPKiwsTG5ubnr44YcrdCwA/58rP6pVnUiy1q9f73hc/DHQ+vXrOy3u7u7WqFGjLMuyrKlTp1qSrEOHDjn2+/rrry1J1sGDB6v6FIBKFx8fb0mybDabderUqRLbf/jhB+uhhx6yGjRoYPn4+FgjR460zpw5U+Jj3qV9FLywsNB69tlnrSZNmlj16tWzwsLCrCNHjpT4KLhlWVZOTo4VFRVlBQUFWXXq1LGaNGli9enTx5o/f75VUFBw3fP48MMPrT59+lgeHh6Wt7e31bNnT2v16tVOY+Li4qyuXbtadrvdatSokTV27Fjrhx9+KHGsd99912rdurVVp04dKzg42Pr000/L/Cj4q6++WmL/X/9srl69aj3xxBNW06ZNLZvNxsfCgUpgsyzubJN+ft97/fr1GjZsmKSfPxkyduxY7d+/v8RNgZ6envL399ecOXP00ksv6cqVK45tly5dUr169bRlyxYNGjSoKk8BAACIt6XK1LVrVxUWFurs2bOO76b4td/85je6evWqjh496rhx8fDhw5LkdBMiAACoOrf1lZvc3FwdOXJE0s8xs3DhQg0YMECNGjXSnXfeqXHjxmnbtm1asGCBunbtqnPnzikhIUH33HOPwsPDVVRUpB49esjT01OLFy9WUVGRpk2bJm9vb23ZssXFZwcAwO3pto6bxMREDRgwoMT6iIgIrVy5UleuXNGf//xnrVq1SqdPn1aTJk3Uu3dvzZ07V507d5YknTlzRk888YS2bNmi+vXra+jQoVqwYIEaNWpU1acDAAB0m8cNAAAwD99zAwAAjELcAAAAo9x2n5YqKirSmTNn5OXlxdeeAwBQQ1iWpZycHDVr1ky1al372sxtFzdnzpxRYGCgq6cBAAAq4NSpU7rjjjuuOea2ixsvLy9JP/9wvL29XTwbAABwI7KzsxUYGOj4d/xabru4KX4rytvbm7gBAKCGuZFbSrihGAAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUdxdPQFTdZ+5ytVTAKqdr18d7+opALgNcOUGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYxaVxEx0drR49esjLy0u+vr4aNmyYDh06dN391q5dqw4dOqhu3brq3LmzPvnkkyqYLQAAqAlcGjdJSUmaNm2aduzYofj4eF25ckWDBw9WXl5emfts375dY8aM0eTJk7Vnzx4NGzZMw4YN0759+6pw5gAAoLqyWZZluXoSxc6dOydfX18lJSWpb9++pY4ZPXq08vLytGnTJse63r17Kzg4WEuXLr3uc2RnZ8vHx0dZWVny9vautLn/WveZq27ZsYGa6utXx7t6CgBqqPL8+12t7rnJysqSJDVq1KjMMcnJyQoNDXVaFxYWpuTk5FLH5+fnKzs722kBAADmqjZxU1RUpBkzZug3v/mN7r777jLHpaeny8/Pz2mdn5+f0tPTSx0fHR0tHx8fxxIYGFip8wYAANVLtYmbadOmad++fVqzZk2lHjcqKkpZWVmO5dSpU5V6fAAAUL24u3oCkvT4449r06ZN2rp1q+64445rjvX391dGRobTuoyMDPn7+5c63m63y263V9pcAQBA9ebSKzeWZenxxx/X+vXr9dlnn6lVq1bX3SckJEQJCQlO6+Lj4xUSEnKrpgkAAGoQl165mTZtmt577z1t3LhRXl5ejvtmfHx85OHhIUkaP368mjdvrujoaEnS9OnT1a9fPy1YsEDh4eFas2aNdu3apWXLlrnsPAAAQPXh0is3b775prKystS/f38FBAQ4lri4OMeY1NRUpaWlOR736dNH7733npYtW6YuXbpo3bp12rBhwzVvQgYAALcPl165uZGv2ElMTCyxbuTIkRo5cuQtmBEAAKjpqs2npQAAACoDcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjOLSuNm6daseeOABNWvWTDabTRs2bLjm+MTERNlsthJLenp61UwYAABUey6Nm7y8PHXp0kVLliwp136HDh1SWlqaY/H19b1FMwQAADWNuyuffOjQoRo6dGi59/P19VWDBg0qf0IAAKDGq5H33AQHBysgIECDBg3Stm3brjk2Pz9f2dnZTgsAADBXjYqbgIAALV26VB988IE++OADBQYGqn///tq9e3eZ+0RHR8vHx8exBAYGVuGMAQBAVXPp21Ll1b59e7Vv397xuE+fPjp69KgWLVqkd955p9R9oqKiFBkZ6XicnZ1N4AAAYLAaFTel6dmzp7744osyt9vtdtnt9iqcEQAAcKUa9bZUaVJSUhQQEODqaQAAgGrCpVducnNzdeTIEcfj48ePKyUlRY0aNdKdd96pqKgonT59WqtWrZIkLV68WK1atVKnTp10+fJlLV++XJ999pm2bNniqlMAAADVjEvjZteuXRowYIDjcfG9MREREVq5cqXS0tKUmprq2F5QUKCnn35ap0+fVr169XTPPffoX//6l9MxAADA7c1mWZbl6klUpezsbPn4+CgrK0ve3t637Hm6z1x1y44N1FRfvzre1VMAUEOV59/vGn/PDQAAwC8RNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxSobgZOHCgMjMzS6zPzs7WwIEDb3ZOAAAAFVahuElMTFRBQUGJ9ZcvX9bnn39+05MCAACoKPfyDP7mm28c//3dd98pPT3d8biwsFCbN29W8+bNK292AAAA5VSuuAkODpbNZpPNZiv17ScPDw+9/vrrlTY5AACA8ipX3Bw/flyWZal169bauXOnmjZt6thWp04d+fr6ys3NrdInCQAAcKPKFTctWrSQJBUVFd2SyQAAANyscsXNL33//ff697//rbNnz5aIndmzZ9/0xAAAACqiQnHzt7/9TY8++qiaNGkif39/2Ww2xzabzUbcAAAAl6lQ3Pz5z3/WvHnz9Oyzz1b2fAAAAG5Khb7n5sKFCxo5cmRlzwUAAOCmVShuRo4cqS1btlT2XAAAAG5ahd6WCgoK0vPPP68dO3aoc+fOql27ttP2J598slImBwAAUF4Viptly5bJ09NTSUlJSkpKctpms9mIGwAA4DIVipvjx49X9jwAAAAqRYXuuQEAAKiuKnTlZtKkSdfcHhsbW6HJAAAA3KwKxc2FCxecHl+5ckX79u1TZmZmqX9QEwAAoKpUKG7Wr19fYl1RUZEeffRRtWnT5qYnBQAAUFGVds9NrVq1FBkZqUWLFlXWIQEAAMqtUm8oPnr0qK5evVqZhwQAACiXCr0tFRkZ6fTYsiylpaXp448/VkRERKVMDAAAoCIqFDd79uxxelyrVi01bdpUCxYsuO4nqQAAAG6lCsXNv//978qeBwAAQKWoUNwUO3funA4dOiRJat++vZo2bVopkwIAAKioCt1QnJeXp0mTJikgIEB9+/ZV37591axZM02ePFkXL16s7DkCAADcsArFTWRkpJKSkvTRRx8pMzNTmZmZ2rhxo5KSkvT0009X9hwBAABuWIXelvrggw+0bt069e/f37Huvvvuk4eHh0aNGqU333yzsuYHAABQLhW6cnPx4kX5+fmVWO/r68vbUgAAwKUqFDchISGaM2eOLl++7Fh36dIlzZ07VyEhIZU2OQAAgPKq0NtSixcv1pAhQ3THHXeoS5cukqS9e/fKbrdry5YtlTpBAACA8qhQ3HTu3Fnff/+9/v73v+vgwYOSpDFjxmjs2LHy8PCo1AkCAACUR4XiJjo6Wn5+fpo6darT+tjYWJ07d07PPvtspUwOAACgvCp0z81bb72lDh06lFjfqVMnLV269KYnBQAAUFEVipv09HQFBASUWN+0aVOlpaXd9KQAAAAqqkJxExgYqG3btpVYv23bNjVr1uymJwUAAFBRFbrnZurUqZoxY4auXLmigQMHSpISEhI0a9YsvqEYAAC4VIXiZubMmfrxxx/12GOPqaCgQJJUt25dPfvss4qKiqrUCQIAAJRHheLGZrPplVde0fPPP68DBw7Iw8NDbdu2ld1ur+z5AQAAlEuF4qaYp6enevToUVlzAQAAuGkVuqEYAACguiJuAACAUYgbAABgFOIGAAAYxaVxs3XrVj3wwANq1qyZbDabNmzYcN19EhMT1a1bN9ntdgUFBWnlypW3fJ4AAKDmcGnc5OXlqUuXLlqyZMkNjT9+/LjCw8M1YMAApaSkaMaMGZoyZYo+/fTTWzxTAABQU9zUR8Fv1tChQzV06NAbHr906VK1atVKCxYskCR17NhRX3zxhRYtWqSwsLBbNU0AAFCD1Kh7bpKTkxUaGuq0LiwsTMnJyWXuk5+fr+zsbKcFAACYq0bFTXp6uvz8/JzW+fn5KTs7W5cuXSp1n+joaPn4+DiWwMDAqpgqAABwkRoVNxURFRWlrKwsx3Lq1ClXTwkAANxCLr3nprz8/f2VkZHhtC4jI0Pe3t7y8PAodR+73c7fvAIA4DZSo67chISEKCEhwWldfHy8QkJCXDQjAABQ3bg0bnJzc5WSkqKUlBRJP3/UOyUlRampqZJ+fktp/PjxjvF/+MMfdOzYMc2aNUsHDx7UG2+8offff19PPfWUK6YPAACqIZfGza5du9S1a1d17dpVkhQZGamuXbtq9uzZkqS0tDRH6EhSq1at9PHHHys+Pl5dunTRggULtHz5cj4GDgAAHFx6z03//v1lWVaZ20v79uH+/ftrz549t3BWAACgJqtR99wAAABcD3EDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwirurJwAANU3qC51dPQWg2rlz9reunoIDV24AAIBRiBsAAGAU4gYAABiFuAEAAEapFnGzZMkStWzZUnXr1lWvXr20c+fOMseuXLlSNpvNaalbt24VzhYAAFRnLo+buLg4RUZGas6cOdq9e7e6dOmisLAwnT17tsx9vL29lZaW5lhOnjxZhTMGAADVmcvjZuHChZo6daomTpyou+66S0uXLlW9evUUGxtb5j42m03+/v6Oxc/PrwpnDAAAqjOXxk1BQYG+/vprhYaGOtbVqlVLoaGhSk5OLnO/3NxctWjRQoGBgXrwwQe1f//+Msfm5+crOzvbaQEAAOZyadycP39ehYWFJa68+Pn5KT09vdR92rdvr9jYWG3cuFHvvvuuioqK1KdPH/3www+ljo+OjpaPj49jCQwMrPTzAAAA1YfL35Yqr5CQEI0fP17BwcHq16+f/vGPf6hp06Z66623Sh0fFRWlrKwsx3Lq1KkqnjEAAKhKLv3zC02aNJGbm5syMjKc1mdkZMjf3/+GjlG7dm117dpVR44cKXW73W6X3W6/6bkCAICawaVXburUqaPu3bsrISHBsa6oqEgJCQkKCQm5oWMUFhbq22+/VUBAwK2aJgAAqEFc/oczIyMjFRERoXvvvVc9e/bU4sWLlZeXp4kTJ0qSxo8fr+bNmys6OlqS9MILL6h3794KCgpSZmamXn31VZ08eVJTpkxx5WkAAIBqwuVxM3r0aJ07d06zZ89Wenq6goODtXnzZsdNxqmpqapV6/8uMF24cEFTp05Venq6GjZsqO7du2v79u266667XHUKAACgGrFZlmW5ehJVKTs7Wz4+PsrKypK3t/cte57uM1fdsmMDNdXXr4539RQqReoLnV09BaDauXP2t7f0+OX597vGfVoKAADgWogbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGCUahE3S5YsUcuWLVW3bl316tVLO3fuvOb4tWvXqkOHDqpbt646d+6sTz75pIpmCgAAqjuXx01cXJwiIyM1Z84c7d69W126dFFYWJjOnj1b6vjt27drzJgxmjx5svbs2aNhw4Zp2LBh2rdvXxXPHAAAVEcuj5uFCxdq6tSpmjhxou666y4tXbpU9erVU2xsbKnjX3vtNQ0ZMkQzZ85Ux44d9eKLL6pbt27661//WsUzBwAA1ZFL46agoEBff/21QkNDHetq1aql0NBQJScnl7pPcnKy03hJCgsLK3M8AAC4vbi78snPnz+vwsJC+fn5Oa338/PTwYMHS90nPT291PHp6emljs/Pz1d+fr7jcVZWliQpOzv7ZqZ+XYX5l27p8YGa6Fa/7qpKzuVCV08BqHZu9eu7+PiWZV13rEvjpipER0dr7ty5JdYHBga6YDbA7c3n9T+4egoAbpVonyp5mpycHPn4XPu5XBo3TZo0kZubmzIyMpzWZ2RkyN/fv9R9/P39yzU+KipKkZGRjsdFRUX66aef1LhxY9lstps8A1R32dnZCgwM1KlTp+Tt7e3q6QCoRLy+by+WZSknJ0fNmjW77liXxk2dOnXUvXt3JSQkaNiwYZJ+jo+EhAQ9/vjjpe4TEhKihIQEzZgxw7EuPj5eISEhpY632+2y2+1O6xo0aFAZ00cN4u3tzS8/wFC8vm8f17tiU8zlb0tFRkYqIiJC9957r3r27KnFixcrLy9PEydOlCSNHz9ezZs3V3R0tCRp+vTp6tevnxYsWKDw8HCtWbNGu3bt0rJly1x5GgAAoJpwedyMHj1a586d0+zZs5Wenq7g4GBt3rzZcdNwamqqatX6vw919enTR++9957++Mc/6n/+53/Utm1bbdiwQXfffberTgEAAFQjNutGbjsGaqj8/HxFR0crKiqqxNuTAGo2Xt8oC3EDAACM4vJvKAYAAKhMxA0AADAKcQMAAIxC3AAAAKMQN3CpCRMmyGazOZbGjRtryJAh+uabbxxjfrn9l8uaNWskSYmJibLZbOrUqZMKC53/5k+DBg20cuVKx+OWLVtq8eLFTmP27Nmj0aNHKyAgQHa7XS1atND999+vjz76yPE3TE6cOCGbzSZfX1/l5OQ47R8cHKw//elPlfdDAWqg5ORkubm5KTw83Gl9eV87+/fv16hRo9S0aVPZ7Xa1a9dOs2fP1sWLF53GtWzZ0un3wC916tRJNpvN6bVfLDo6Wm5ubnr11VdLbFu5cmWJL3mdPHmyOnfurIKCAqf1n3zyierUqaPdu3eXOA5cj7iByw0ZMkRpaWlKS0tTQkKC3N3ddf/99zuNWbFihWNM8VL8rdbFjh07plWrVpXruTdu3KjevXsrNzdXb7/9tg4cOKDNmzfroYce0h//+EfHH1otlpOTo/nz51foPAGTxcTE6IknntDWrVt15syZEttv5LWzY8cO9erVSwUFBfr44491+PBhzZs3TytXrtSgQYNKBEZgYKBWrFhR4hjp6emqX79+qc8RGxurWbNmKTY29obOa9GiRcrJydGcOXMc6zIzMzV16lQ9//zz6tat2w0dB1WLuIHL2e12+fv7y9/fX8HBwXruued06tQpnTt3zjGmQYMGjjHFS926dZ2O88QTT2jOnDlOfwX+WvLy8jR58mSFh4fr448/1uDBg9W6dWt17NhRkydP1t69e0t81fcTTzyhhQsX6uzZszd/4oAhcnNzFRcXp0cffVTh4eGlXjG53mvHsixNnjxZHTt21D/+8Q/17NlTLVq00MiRI/XRRx8pOTlZixYtctpn7NixSkpK0qlTpxzrYmNjNXbsWLm7l/yO2qSkJF26dEkvvPCCsrOztX379uuem7e3t1asWKEFCxboyy+/lCTNmDFDzZs3V1RU1HX3h2sQN6hWcnNz9e677yooKEiNGzcu174zZszQ1atX9frrr9/Q+C1btujHH3/UrFmzyhzz6z+uOmbMGAUFBemFF14o19wAk73//vvq0KGD2rdvr3Hjxik2Nla//gq16712UlJS9N133ykyMtLpW+klqUuXLgoNDdXq1aud1vv5+SksLExvv/22JOnixYuKi4vTpEmTSn2OmJgYjRkzRrVr19aYMWMUExNzQ+c3YMAAPfbYY4qIiNDatWv1/vvva9WqVaUGFKoH4gYut2nTJnl6esrT01NeXl768MMPFRcX5/QLbsyYMY4xxUtqaqrTcerVq6c5c+YoOjq6xNtJpTl8+LAkqX379o51X331ldNzbNq0yWkfm82ml19+WcuWLdPRo0dv5rQBY8TExGjcuHGSfn6bOSsrS0lJSU5jrvfaKX49duzYsdTn6Nixo2PML02aNEkrV66UZVlat26d2rRpo+Dg4BLjsrOztW7dOsc8x40bp/fff1+5ubk3dI7Ff9/w4Ycf1ksvvaQOHTrc0H5wDeIGLjdgwAClpKQoJSVFO3fuVFhYmIYOHaqTJ086xixatMgxpngp7c/eT548WY0bN9Yrr7xSobncc889juPn5eXp6tWrJcaEhYXpP/7jP/T8889X6DkAkxw6dEg7d+7UmDFjJEnu7u4aPXp0qVdFbuS1U94vzQ8PD1dubq62bt2q2NjYMq/arF69Wm3atFGXLl0k/Xwzc4sWLRQXF3dDz+Ph4aFnnnlG9erV0/Tp08s1R1Q94gYuV79+fQUFBSkoKEg9evTQ8uXLlZeXp7/97W+OMf7+/o4xxUtpl4Td3d01b948vfbaa6Xe1PhLbdu2lfTzL+didrvdcfxrefnllxUXF6c9e/aU51QB48TExOjq1atq1qyZ3N3d5e7urjfffFMffPBBqVdQy3rttGvXTpJ04MCBUp/nwIEDjjG/5O7urv/6r//SnDlz9OWXX2rs2LFlznP//v2OObq7u+u777674RuLi5/Lzc2txNvVqH6IG1Q7NptNtWrV0qVLlyq0/8iRI9WpUyfNnTv3muMGDx6sRo0aVegqT8+ePTV8+HA999xzFZojYIKrV69q1apVWrBggdNV1b1796pZs2Yl7pGRyn7tBAcHq0OHDlq0aJGKioqctu3du1f/+te/HFeHfm3SpElKSkrSgw8+qIYNG5bY/u2332rXrl1KTEx0mmdiYqKSk5N18ODBm/gpoDribii4XH5+vtLT0yVJFy5c0F//+lfl5ubqgQcecIzJzMx0jCnm5eVV5sc9X375ZYWFhV3zeT09PbV8+XKNHj1a4eHhevLJJ9W2bVvl5uZq8+bNkiQ3N7cy9583b546derETYW4bW3atEkXLlzQ5MmTS3yycMSIEYqJidGQIUNK7Ffaa8dmsykmJkaDBg3SiBEjFBUVJX9/f3355Zd6+umnFRISohkzZpQ6j44dO+r8+fOqV69eqdtjYmLUs2dP9e3bt8S2Hj16KCYmxvG9N4WFhUpJSXEaY7fby7wXCNUTV27gcps3b1ZAQIACAgLUq1cvffXVV1q7dq369+/vGDNx4kTHmOLlWp+KGjhwoAYOHFjqPTO/9NBDD2n79u2qV6+exo8fr/bt22vgwIH67LPPtGbNmhLft/NL7dq106RJk3T58uVynzNggpiYGIWGhpYIG+nnuNm1a5eys7NLbCvrtdOnTx/t2LFDbm5uGjp0qIKCghQVFaWIiAjFx8fLbreXOZfGjRvLw8OjxPqCggK9++67GjFiRKn7jRgxQqtWrdKVK1ck/fyJza5duzotv/wfLdQMNqu8d28BAABUY1y5AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gaAcWw2mzZs2HBTx5gwYYKGDRtWKfMBULWIGwDVSmJiomw2mzIzM109FQA1FHEDAACMQtwAqFG++uorDRo0SE2aNJGPj4/69eun3bt3lxiXlpamoUOHysPDQ61bt9a6deuctp86dUqjRo1SgwYN1KhRIz344IM6ceJEFZ0FgFuJuAFQo+Tk5CgiIkJffPGFduzYobZt2+q+++5TTk6O07jnn39eI0aM0N69ezV27Fg9/PDDOnDggCTpypUrCgsLk5eXlz7//HNt27ZNnp6eGjJkiAoKClxxWgAqkfv1hwBA9TFw4ECnx8uWLVODBg2UlJTk9FfcR44cqSlTpkiSXnzxRcXHx+v111/XG2+8obi4OBUVFWn58uWy2WySpBUrVqhBgwZKTEzU4MGDq+6EAFQ6rtwAqFEyMjI0depUtW3bVj4+PvL29lZubq5SU1OdxoWEhJR4XHzlZu/evTpy5Ii8vLzk6ekpT09PNWrUSJcvX9bRo0er7FwA3BpcuQFQo0REROjHH3/Ua6+9phYtWshutyskJKRcbyfl5uaqe/fu+vvf/15iW9OmTStzugBcgLgBUKNs27ZNb7zxhu677z5JP98YfP78+RLjduzYofHjxzs97tq1qySpW7duiouLk6+vr7y9vatm4gCqDG9LAaiWvv32W6WkpDiWvXv3SpLatm2rd955RwcOHNCXX36psWPHysPDo8T+a9euVWxsrA4fPqw5c+Zo586devzxxyVJY8eOVZMmTfTggw/q888/1/Hjx5WYmKgnn3xSP/zwQ5WeJ4DKx5UbANVS3759nR67ubnp6tWriomJ0X//93+rW7duCgwM1EsvvaRnnnmmxP5z587VmjVr9NhjjykgIECrV6/WXXfdJUmqV6+etm7dqmeffVbDhw9XTk6Omjdvrt/97ndcyQEMYLMsy3L1JAAAACoLb0sBAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACM8v8Ap0Rg9kpidMMAAAAASUVORK5CYII=\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = column_headers" ], "metadata": { "id": "4czqBnTL5FSf" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df = frame" ], "metadata": { "id": "qM0j2lyw582V" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def Remove_Outlier_Indices(df):\n", " Q1 = df.quantile(0.02)\n", " Q3 = df.quantile(0.98)\n", " IQR = Q3 - Q1\n", " #trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(df)\n", "new_data = df[nonOutlierList]\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b30d4e39-807c-4290-f5db-67c7fd37a58b", "id": "i_PXbZyt5-fR" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":2: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":3: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":6: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] } ] }, { "cell_type": "code", "source": [ "df = new_data\n", "df = df.reset_index(drop=True)\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 496 }, "outputId": "34a87d68-8be6-4deb-9143-5f8d21a93428", "id": "FSPL95xg5-fT" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Destination Port Flow Duration Total Fwd Packets \\\n", "0 54865 3 2 \n", "1 55054 109 1 \n", "2 55055 52 1 \n", "3 46236 34 1 \n", "4 54863 3 2 \n", "... ... ... ... \n", "2539413 53 32215 4 \n", "2539414 53 324 2 \n", "2539415 58030 82 2 \n", "2539416 53 1048635 6 \n", "2539417 53 94939 4 \n", "\n", " Total Backward Packets Total Length of Fwd Packets \\\n", "0 0 12 \n", "1 1 6 \n", "2 1 6 \n", "3 1 6 \n", "4 0 12 \n", "... ... ... \n", "2539413 2 112 \n", "2539414 2 84 \n", "2539415 1 31 \n", "2539416 2 192 \n", "2539417 2 188 \n", "\n", " Total Length of Bwd Packets Fwd Packet Length Max \\\n", "0 0 6 \n", "1 6 6 \n", "2 6 6 \n", "3 6 6 \n", "4 0 6 \n", "... ... ... \n", "2539413 152 28 \n", "2539414 362 42 \n", "2539415 6 31 \n", "2539416 256 32 \n", "2539417 226 47 \n", "\n", " Fwd Packet Length Min Fwd Packet Length Mean \\\n", "0 6 6.0 \n", "1 6 6.0 \n", "2 6 6.0 \n", "3 6 6.0 \n", "4 6 6.0 \n", "... ... ... \n", "2539413 28 28.0 \n", "2539414 42 42.0 \n", "2539415 0 15.5 \n", "2539416 32 32.0 \n", "2539417 47 47.0 \n", "\n", " Fwd Packet Length Std ... min_seg_size_forward Active Mean \\\n", "0 0.00000 ... 20 0.0 \n", "1 0.00000 ... 20 0.0 \n", "2 0.00000 ... 20 0.0 \n", "3 0.00000 ... 20 0.0 \n", "4 0.00000 ... 20 0.0 \n", "... ... ... ... ... \n", "2539413 0.00000 ... 20 0.0 \n", "2539414 0.00000 ... 20 0.0 \n", "2539415 21.92031 ... 32 0.0 \n", "2539416 0.00000 ... 20 0.0 \n", "2539417 0.00000 ... 20 0.0 \n", "\n", " Active Std Active Max Active Min Idle Mean Idle Std \\\n", "0 0.0 0 0 0.0 0.0 \n", "1 0.0 0 0 0.0 0.0 \n", "2 0.0 0 0 0.0 0.0 \n", "3 0.0 0 0 0.0 0.0 \n", "4 0.0 0 0 0.0 0.0 \n", "... ... ... ... ... ... \n", "2539413 0.0 0 0 0.0 0.0 \n", "2539414 0.0 0 0 0.0 0.0 \n", "2539415 0.0 0 0 0.0 0.0 \n", "2539416 0.0 0 0 0.0 0.0 \n", "2539417 0.0 0 0 0.0 0.0 \n", "\n", " Idle Max Idle Min Label \n", "0 0 0 BENIGN \n", "1 0 0 BENIGN \n", "2 0 0 BENIGN \n", "3 0 0 BENIGN \n", "4 0 0 BENIGN \n", "... ... ... ... \n", "2539413 0 0 BENIGN \n", "2539414 0 0 BENIGN \n", "2539415 0 0 BENIGN \n", "2539416 0 0 BENIGN \n", "2539417 0 0 BENIGN \n", "\n", "[2539418 rows x 79 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Destination PortFlow DurationTotal Fwd PacketsTotal Backward PacketsTotal Length of Fwd PacketsTotal Length of Bwd PacketsFwd Packet Length MaxFwd Packet Length MinFwd Packet Length MeanFwd Packet Length Std...min_seg_size_forwardActive MeanActive StdActive MaxActive MinIdle MeanIdle StdIdle MaxIdle MinLabel
054865320120666.00.00000...200.00.0000.00.000BENIGN
1550541091166666.00.00000...200.00.0000.00.000BENIGN
255055521166666.00.00000...200.00.0000.00.000BENIGN
346236341166666.00.00000...200.00.0000.00.000BENIGN
454863320120666.00.00000...200.00.0000.00.000BENIGN
..................................................................
2539413533221542112152282828.00.00000...200.00.0000.00.000BENIGN
2539414533242284362424242.00.00000...200.00.0000.00.000BENIGN
253941558030822131631015.521.92031...320.00.0000.00.000BENIGN
253941653104863562192256323232.00.00000...200.00.0000.00.000BENIGN
2539417539493942188226474747.00.00000...200.00.0000.00.000BENIGN
\n", "

2539418 rows × 79 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import f_regression\n", "\n", "selector = SelectKBest(f_regression, k=10)\n", "X = df.drop([' Label'], axis=1)\n", "Y = df[\" Label\"].astype('category').cat.codes\n", "X_new = selector.fit(X, Y)\n", "X.columns.values[selector.get_support()]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2523add3-8fd4-4135-f882-ac825e80fad6", "id": "ck_o5gg35-fV" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['Bwd Packet Length Max', ' Bwd Packet Length Mean',\n", " ' Bwd Packet Length Std', ' Max Packet Length',\n", " ' Packet Length Mean', ' Packet Length Std',\n", " ' Packet Length Variance', ' Average Packet Size',\n", " ' Avg Bwd Segment Size', ' Idle Min'], dtype=object)" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "columns = X.columns.values[selector.get_support()]" ], "metadata": { "id": "EeRnvhzX5-fV" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "columns = []\n", "for c in X.columns.values[selector.get_support()]:\n", " columns.append(str(c))" ], "metadata": { "id": "tLWiApuU5-fW" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "principalDf = pd.DataFrame(data = X\n", " , columns = X.columns.values[selector.get_support()])\n", "finalDf = pd.concat([principalDf, df[\" Label\"]], axis = 1)\n", "finalDf" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "outputId": "5d9cbcdc-d1a2-4c38-da3c-8c6cda992c94", "id": "2_NJWPRf5-fW" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Bwd Packet Length Max Bwd Packet Length Mean \\\n", "0 0 0.0 \n", "1 6 6.0 \n", "2 6 6.0 \n", "3 6 6.0 \n", "4 0 0.0 \n", "... ... ... \n", "2539413 76 76.0 \n", "2539414 181 181.0 \n", "2539415 6 6.0 \n", "2539416 128 128.0 \n", "2539417 113 113.0 \n", "\n", " Bwd Packet Length Std Max Packet Length Packet Length Mean \\\n", "0 0.0 6 6.000000 \n", "1 0.0 6 6.000000 \n", "2 0.0 6 6.000000 \n", "3 0.0 6 6.000000 \n", "4 0.0 6 6.000000 \n", "... ... ... ... \n", "2539413 0.0 76 41.714286 \n", "2539414 0.0 181 97.600000 \n", "2539415 0.0 31 17.000000 \n", "2539416 0.0 128 53.333333 \n", "2539417 0.0 113 65.857143 \n", "\n", " Packet Length Std Packet Length Variance Average Packet Size \\\n", "0 0.000000 0.000000 9.000000 \n", "1 0.000000 0.000000 9.000000 \n", "2 0.000000 0.000000 9.000000 \n", "3 0.000000 0.000000 9.000000 \n", "4 0.000000 0.000000 9.000000 \n", "... ... ... ... \n", "2539413 23.421602 548.571429 48.666667 \n", "2539414 76.133435 5796.300000 122.000000 \n", "2539415 16.350331 267.333333 22.666667 \n", "2539416 42.332021 1792.000000 60.000000 \n", "2539417 32.204702 1037.142857 76.833333 \n", "\n", " Avg Bwd Segment Size Idle Min Label \n", "0 0.0 0 BENIGN \n", "1 6.0 0 BENIGN \n", "2 6.0 0 BENIGN \n", "3 6.0 0 BENIGN \n", "4 0.0 0 BENIGN \n", "... ... ... ... \n", "2539413 76.0 0 BENIGN \n", "2539414 181.0 0 BENIGN \n", "2539415 6.0 0 BENIGN \n", "2539416 128.0 0 BENIGN \n", "2539417 113.0 0 BENIGN \n", "\n", "[2539418 rows x 11 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Bwd Packet Length MaxBwd Packet Length MeanBwd Packet Length StdMax Packet LengthPacket Length MeanPacket Length StdPacket Length VarianceAverage Packet SizeAvg Bwd Segment SizeIdle MinLabel
000.00.066.0000000.0000000.0000009.0000000.00BENIGN
166.00.066.0000000.0000000.0000009.0000006.00BENIGN
266.00.066.0000000.0000000.0000009.0000006.00BENIGN
366.00.066.0000000.0000000.0000009.0000006.00BENIGN
400.00.066.0000000.0000000.0000009.0000000.00BENIGN
....................................
25394137676.00.07641.71428623.421602548.57142948.66666776.00BENIGN
2539414181181.00.018197.60000076.1334355796.300000122.000000181.00BENIGN
253941566.00.03117.00000016.350331267.33333322.6666676.00BENIGN
2539416128128.00.012853.33333342.3320211792.00000060.000000128.00BENIGN
2539417113113.00.011365.85714332.2047021037.14285776.833333113.00BENIGN
\n", "

2539418 rows × 11 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "c85wOCBv5-fX" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\" Label\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \" Label\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"BENIGN\", \"ANOMALY\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "id": "8txIkhqk5-fX", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "89b0fe40-f00c-448b-c361-662d4091ef07" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "06b9c0c7-51bf-4366-caa9-b7223b279103", "id": "cEIPaJMi5-fa" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "14880/14880 [==============================] - 408s 26ms/step - loss: 0.2511 - sparse_categorical_accuracy: 0.8851\n", "Epoch 2/10\n", "14880/14880 [==============================] - 318s 21ms/step - loss: 0.2333 - sparse_categorical_accuracy: 0.8858\n", "Epoch 3/10\n", "14880/14880 [==============================] - 319s 21ms/step - loss: 0.2295 - sparse_categorical_accuracy: 0.8862\n", "Epoch 4/10\n", "14880/14880 [==============================] - 321s 22ms/step - loss: 0.2271 - sparse_categorical_accuracy: 0.8895\n", "Epoch 5/10\n", "14880/14880 [==============================] - 320s 21ms/step - loss: 0.2263 - sparse_categorical_accuracy: 0.8895\n", "Epoch 6/10\n", "14880/14880 [==============================] - 315s 21ms/step - loss: 0.2255 - sparse_categorical_accuracy: 0.8892\n", "Epoch 7/10\n", "14880/14880 [==============================] - 325s 22ms/step - loss: 0.2238 - sparse_categorical_accuracy: 0.8902\n", "Epoch 8/10\n", "14880/14880 [==============================] - 321s 22ms/step - loss: 0.2226 - sparse_categorical_accuracy: 0.8912\n", "Epoch 9/10\n", "14880/14880 [==============================] - 318s 21ms/step - loss: 0.2223 - sparse_categorical_accuracy: 0.8906\n", "Epoch 10/10\n", "14880/14880 [==============================] - 318s 21ms/step - loss: 0.2210 - sparse_categorical_accuracy: 0.8925\n", "Model training finished\n", "Evaluating the model on the test data...\n", "4960/4960 [==============================] - 50s 10ms/step - loss: 0.2180 - sparse_categorical_accuracy: 0.8961\n", "Test accuracy: 89.61%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('BENIGN', 0,inplace=True)\n", "data['target'].replace('ANOMALY', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588 }, "id": "-wqDH6JHYGIy", "outputId": "0b8b51e3-7f3c-4f21-f3f9-175c5882ff32" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4960/4960 [==============================] - 49s 9ms/step\n", "[[0.88787502 0.02786123]\n", " [0.11212498 0.97213877]]\n", "ACC: 0.9300068949681092\n", "PR: 0.969575052799438\n", "TPR: 0.8878750167992865\n", "FPR: 0.027861226863068127\n", "F1Score: 0.9269282446751819\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "# UNSWB-NB15" ], "metadata": { "id": "WoKuwfkLuv1A" } }, { "cell_type": "code", "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import pandas as pd\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "from keras import losses\n", "from keras import optimizers\n", "from keras import metrics\n", "import math\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ], "metadata": { "id": "Fdfw744ivFEM" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a49e269c-cb9d-4f4a-9bc8-66dc6c943e22", "id": "QkqJfn8Uuv1B" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = [\n", "\"srcip\",\n", "\"sport\",\n", "\"dstip\",\n", "\"dsport\",\n", "\"proto\",\n", "\"state\",\n", "\"dur\",\n", "\"sbytes\",\n", "\"dbytes\",\n", "\"sttl\",\n", "\"dttl\",\n", "\"sloss\",\n", "\"dloss\",\n", "\"service\",\n", "\"Sload\",\n", "\"Dload\",\n", "\"Spkts\",\n", "\"Dpkts\",\n", "\"swin\",\n", "\"dwin\",\n", "\"stcpb\",\n", "\"dtcpb\",\n", "\"smeansz\",\n", "\"dmeansz\",\n", "\"trans_depth\",\n", "\"res_bdy_len\",\n", "\"Sjit\",\n", "\"Djit\",\n", "\"Stime\",\n", "\"Ltime\",\n", "\"Sintpkt\",\n", "\"Dintpkt\",\n", "\"tcprtt\",\n", "\"synack\",\n", "\"ackdat\",\n", "\"is_sm_ips_ports\",\n", "\"ct_state_ttl\",\n", "\"ct_flw_http_mthd\",\n", "\"is_ftp_login\",\n", "\"ct_ftp_cmd\",\n", "\"ct_srv_src\",\n", "\"ct_srv_dst\",\n", "\"ct_dst_ltm\",\n", "\"ct_src_ ltm\",\n", "\"ct_src_dport_ltm\",\n", "\"ct_dst_sport_ltm\",\n", "\"ct_dst_src_ltm\",\n", "\"attack_cat\",\n", "\"Label\"\n", "]" ], "metadata": { "id": "bRkq-TJZvIyE" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import glob\n", "import os\n", "\n", "path = r'C:\\DRO\\DCL_rawdata_files' # use your path\n", "all_files = glob.glob(os.path.join(path , \"/content/drive/MyDrive/datasets/UNSW-NB15/*.csv\"))\n", "\n", "li = []\n", "\n", "for filename in all_files:\n", " df = pd.read_csv(filename, index_col=None, header=None, names=CSV_HEADER,low_memory=False)\n", " li.append(df)\n", "\n", "frame = pd.concat(li, axis=0, ignore_index=True)\n", "pd.set_option('display.max_columns', None)\n", "frame" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 461 }, "outputId": "8f6511b3-5a3b-4624-8eec-f0a21c8aee9e", "id": "-CqwxbhMuv1D" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " srcip sport dstip dsport proto state dur \\\n", "0 59.166.0.9 7045 149.171.126.7 25 tcp FIN 0.201886 \n", "1 59.166.0.9 9685 149.171.126.2 80 tcp FIN 5.864748 \n", "2 59.166.0.2 1421 149.171.126.4 53 udp CON 0.001391 \n", "3 59.166.0.2 21553 149.171.126.2 25 tcp FIN 0.053948 \n", "4 59.166.0.8 45212 149.171.126.4 53 udp CON 0.000953 \n", "... ... ... ... ... ... ... ... \n", "2540042 59.166.0.8 12520 149.171.126.6 31010 tcp FIN 0.020383 \n", "2540043 59.166.0.0 18895 149.171.126.9 80 tcp FIN 1.402957 \n", "2540044 59.166.0.0 30103 149.171.126.5 5190 tcp FIN 0.007108 \n", "2540045 59.166.0.6 30388 149.171.126.5 111 udp CON 0.004435 \n", "2540046 59.166.0.0 6055 149.171.126.5 54145 tcp FIN 0.072974 \n", "\n", " sbytes dbytes sttl dttl sloss dloss service Sload \\\n", "0 37552 3380 31 29 18 8 smtp 1.459438e+06 \n", "1 19410 1087890 31 29 2 370 http 2.640454e+04 \n", "2 146 178 31 29 0 0 dns 4.198418e+05 \n", "3 37812 3380 31 29 19 8 smtp 5.503374e+06 \n", "4 146 178 31 29 0 0 dns 6.128017e+05 \n", "... ... ... ... ... ... ... ... ... \n", "2540042 320 1874 31 29 1 2 - 1.047932e+05 \n", "2540043 19410 1087890 31 29 2 370 http 1.103783e+05 \n", "2540044 2158 2464 31 29 6 6 - 2.328644e+06 \n", "2540045 568 304 31 29 0 0 - 7.684329e+05 \n", "2540046 4238 60788 31 29 7 30 - 4.582454e+05 \n", "\n", " Dload Spkts Dpkts swin dwin stcpb dtcpb \\\n", "0 1.307669e+05 52 42 255 255 1422136554 3572668484 \n", "1 1.481983e+06 364 746 255 255 389619597 394688654 \n", "2 5.118620e+05 2 2 0 0 0 0 \n", "3 4.893601e+05 54 42 255 255 4047523379 1903327524 \n", "4 7.471144e+05 2 2 0 0 0 0 \n", "... ... ... ... ... ... ... ... \n", "2540042 6.436736e+05 6 8 255 255 3208686479 3225486168 \n", "2540043 6.195098e+06 364 746 255 255 283296697 2429736754 \n", "2540044 2.658413e+06 24 24 255 255 703293844 2848960529 \n", "2540045 4.112740e+05 4 4 0 0 0 0 \n", "2540046 6.571546e+06 72 72 255 255 1003293149 1003585034 \n", "\n", " smeansz dmeansz trans_depth res_bdy_len Sjit Djit \\\n", "0 722 80 0 0 456.043567 15.530109 \n", "1 53 1458 1 0 1031.366423 690.219581 \n", "2 73 89 0 0 0.000000 0.000000 \n", "3 700 80 0 0 65.909688 3.155258 \n", "4 73 89 0 0 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 53 234 0 0 212.810729 3.079195 \n", "2540043 53 1458 1 3924 203.808900 114.173588 \n", "2540044 90 103 0 0 17.627831 0.432619 \n", "2540045 142 76 0 0 1.638604 1.390643 \n", "2540046 59 844 0 0 62.045310 61.899776 \n", "\n", " Stime Ltime Sintpkt Dintpkt tcprtt synack \\\n", "0 1424250009 1424250009 3.943843 4.912488 0.000590 0.000473 \n", "1 1424250003 1424250009 16.155447 7.871279 0.000771 0.000638 \n", "2 1424250009 1424250009 0.009000 0.002000 0.000000 0.000000 \n", "3 1424250009 1424250009 1.011547 1.302561 0.000674 0.000540 \n", "4 1424250009 1424250009 0.009000 0.004000 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 1421955842 1421955842 4.007400 2.027429 0.006386 0.006189 \n", "2540043 1421955841 1421955842 3.864028 1.882421 0.000712 0.000550 \n", "2540044 1421955842 1421955842 0.274261 0.285478 0.000657 0.000532 \n", "2540045 1421955842 1421955842 1.165667 0.987333 0.000000 0.000000 \n", "2540046 1421955842 1421955842 1.022690 0.997042 0.002317 0.002173 \n", "\n", " ackdat is_sm_ips_ports ct_state_ttl ct_flw_http_mthd \\\n", "0 0.000117 0 0 NaN \n", "1 0.000133 0 0 1.0 \n", "2 0.000000 0 0 NaN \n", "3 0.000134 0 0 NaN \n", "4 0.000000 0 0 NaN \n", "... ... ... ... ... \n", "2540042 0.000197 0 0 0.0 \n", "2540043 0.000162 0 0 4.0 \n", "2540044 0.000125 0 0 0.0 \n", "2540045 0.000000 0 0 0.0 \n", "2540046 0.000144 0 0 0.0 \n", "\n", " is_ftp_login ct_ftp_cmd ct_srv_src ct_srv_dst ct_dst_ltm \\\n", "0 NaN 2 2 7 \n", "1 NaN 3 1 4 \n", "2 NaN 3 5 2 \n", "3 NaN 1 1 4 \n", "4 NaN 2 5 2 \n", "... ... ... ... ... ... \n", "2540042 0.0 0 8 20 7 \n", "2540043 0.0 0 1 1 2 \n", "2540044 0.0 0 13 13 6 \n", "2540045 0.0 0 10 13 6 \n", "2540046 0.0 0 13 13 6 \n", "\n", " ct_src_ ltm ct_src_dport_ltm ct_dst_sport_ltm ct_dst_src_ltm \\\n", "0 4 1 1 3 \n", "1 4 1 1 1 \n", "2 7 1 1 4 \n", "3 7 1 1 3 \n", "4 1 1 1 2 \n", "... ... ... ... ... \n", "2540042 5 1 1 4 \n", "2540043 7 2 2 2 \n", "2540044 7 2 1 2 \n", "2540045 5 1 1 3 \n", "2540046 7 1 1 2 \n", "\n", " attack_cat Label \n", "0 NaN 0 \n", "1 NaN 0 \n", "2 NaN 0 \n", "3 NaN 0 \n", "4 NaN 0 \n", "... ... ... \n", "2540042 NaN 0 \n", "2540043 NaN 0 \n", "2540044 NaN 0 \n", "2540045 NaN 0 \n", "2540046 NaN 0 \n", "\n", "[2540047 rows x 49 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
srcipsportdstipdsportprotostatedursbytesdbytessttldttlslossdlossserviceSloadDloadSpktsDpktsswindwinstcpbdtcpbsmeanszdmeansztrans_depthres_bdy_lenSjitDjitStimeLtimeSintpktDintpkttcprttsynackackdatis_sm_ips_portsct_state_ttlct_flw_http_mthdis_ftp_loginct_ftp_cmdct_srv_srcct_srv_dstct_dst_ltmct_src_ ltmct_src_dport_ltmct_dst_sport_ltmct_dst_src_ltmattack_catLabel
059.166.0.97045149.171.126.725tcpFIN0.2018863755233803129188smtp1.459438e+061.307669e+055242255255142213655435726684847228000456.04356715.530109142425000914242500093.9438434.9124880.0005900.0004730.00011700NaNNaN2274113NaN0
159.166.0.99685149.171.126.280tcpFIN5.86474819410108789031292370http2.640454e+041.481983e+06364746255255389619597394688654531458101031.366423690.2195811424250003142425000916.1554477.8712790.0007710.0006380.000133001.0NaN3144111NaN0
259.166.0.21421149.171.126.453udpCON0.001391146178312900dns4.198418e+055.118620e+052200007389000.0000000.000000142425000914242500090.0090000.0020000.0000000.0000000.00000000NaNNaN3527114NaN0
359.166.0.221553149.171.126.225tcpFIN0.0539483781233803129198smtp5.503374e+064.893601e+05544225525540475233791903327524700800065.9096883.155258142425000914242500091.0115471.3025610.0006740.0005400.00013400NaNNaN1147113NaN0
459.166.0.845212149.171.126.453udpCON0.000953146178312900dns6.128017e+057.471144e+052200007389000.0000000.000000142425000914242500090.0090000.0040000.0000000.0000000.00000000NaNNaN2521112NaN0
......................................................................................................................................................
254004259.166.0.812520149.171.126.631010tcpFIN0.0203833201874312912-1.047932e+056.436736e+0568255255320868647932254861685323400212.8107293.079195142195584214219558424.0074002.0274290.0063860.0061890.000197000.00.0082075114NaN0
254004359.166.0.018895149.171.126.980tcpFIN1.40295719410108789031292370http1.103783e+056.195098e+06364746255255283296697242973675453145813924203.808900114.173588142195584114219558423.8640281.8824210.0007120.0005500.000162004.00.001127222NaN0
254004459.166.0.030103149.171.126.55190tcpFIN0.00710821582464312966-2.328644e+062.658413e+0624242552557032938442848960529901030017.6278310.432619142195584214219558420.2742610.2854780.0006570.0005320.000125000.00.00131367212NaN0
254004559.166.0.630388149.171.126.5111udpCON0.004435568304312900-7.684329e+054.112740e+0544000014276001.6386041.390643142195584214219558421.1656670.9873330.0000000.0000000.000000000.00.00101365113NaN0
254004659.166.0.06055149.171.126.554145tcpFIN0.0729744238607883129730-4.582454e+056.571546e+06727225525510032931491003585034598440062.04531061.899776142195584214219558421.0226900.9970420.0023170.0021730.000144000.00.00131367112NaN0
\n", "

2540047 rows × 49 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "source": [ "frame.srcip = frame.srcip.astype('category').cat.codes\n", "frame.dstip = frame.dstip.astype('category').cat.codes\n", "frame.proto = frame.proto.astype('category').cat.codes\n", "frame.state = frame.state.astype('category').cat.codes\n", "frame.service = frame.service.astype('category').cat.codes\n", "frame.ct_flw_http_mthd = frame.ct_flw_http_mthd.astype('category').cat.codes\n", "frame.is_ftp_login = frame.is_ftp_login.astype('category').cat.codes\n", "frame.ct_ftp_cmd = frame.ct_ftp_cmd.astype('category').cat.codes\n", "frame['Label'] = frame['Label'].astype(str)\n", "frame['Label'] = frame['Label'].str.replace(\"1\", \"anomaly\")\n", "frame['Label'] = frame['Label'].str.replace(\"0\", \"normal\")\n", "frame = frame.drop('attack_cat', axis=1)\n", "frame" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 461 }, "id": "vw4HjZE0vT5x", "outputId": "4690412b-8c79-4afa-eb79-2ff1404cfda6" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " srcip sport dstip dsport proto state dur sbytes dbytes \\\n", "0 42 7045 25 25 114 5 0.201886 37552 3380 \n", "1 42 9685 20 80 114 5 5.864748 19410 1087890 \n", "2 35 1421 22 53 120 2 0.001391 146 178 \n", "3 35 21553 20 25 114 5 0.053948 37812 3380 \n", "4 41 45212 22 53 120 2 0.000953 146 178 \n", "... ... ... ... ... ... ... ... ... ... \n", "2540042 41 12520 24 31010 114 5 0.020383 320 1874 \n", "2540043 33 18895 27 80 114 5 1.402957 19410 1087890 \n", "2540044 33 30103 23 5190 114 5 0.007108 2158 2464 \n", "2540045 39 30388 23 111 120 2 0.004435 568 304 \n", "2540046 33 6055 23 54145 114 5 0.072974 4238 60788 \n", "\n", " sttl dttl sloss dloss service Sload Dload Spkts \\\n", "0 31 29 18 8 9 1.459438e+06 1.307669e+05 52 \n", "1 31 29 2 370 5 2.640454e+04 1.481983e+06 364 \n", "2 31 29 0 0 2 4.198418e+05 5.118620e+05 2 \n", "3 31 29 19 8 9 5.503374e+06 4.893601e+05 54 \n", "4 31 29 0 0 2 6.128017e+05 7.471144e+05 2 \n", "... ... ... ... ... ... ... ... ... \n", "2540042 31 29 1 2 0 1.047932e+05 6.436736e+05 6 \n", "2540043 31 29 2 370 5 1.103783e+05 6.195098e+06 364 \n", "2540044 31 29 6 6 0 2.328644e+06 2.658413e+06 24 \n", "2540045 31 29 0 0 0 7.684329e+05 4.112740e+05 4 \n", "2540046 31 29 7 30 0 4.582454e+05 6.571546e+06 72 \n", "\n", " Dpkts swin dwin stcpb dtcpb smeansz dmeansz \\\n", "0 42 255 255 1422136554 3572668484 722 80 \n", "1 746 255 255 389619597 394688654 53 1458 \n", "2 2 0 0 0 0 73 89 \n", "3 42 255 255 4047523379 1903327524 700 80 \n", "4 2 0 0 0 0 73 89 \n", "... ... ... ... ... ... ... ... \n", "2540042 8 255 255 3208686479 3225486168 53 234 \n", "2540043 746 255 255 283296697 2429736754 53 1458 \n", "2540044 24 255 255 703293844 2848960529 90 103 \n", "2540045 4 0 0 0 0 142 76 \n", "2540046 72 255 255 1003293149 1003585034 59 844 \n", "\n", " trans_depth res_bdy_len Sjit Djit Stime \\\n", "0 0 0 456.043567 15.530109 1424250009 \n", "1 1 0 1031.366423 690.219581 1424250003 \n", "2 0 0 0.000000 0.000000 1424250009 \n", "3 0 0 65.909688 3.155258 1424250009 \n", "4 0 0 0.000000 0.000000 1424250009 \n", "... ... ... ... ... ... \n", "2540042 0 0 212.810729 3.079195 1421955842 \n", "2540043 1 3924 203.808900 114.173588 1421955841 \n", "2540044 0 0 17.627831 0.432619 1421955842 \n", "2540045 0 0 1.638604 1.390643 1421955842 \n", "2540046 0 0 62.045310 61.899776 1421955842 \n", "\n", " Ltime Sintpkt Dintpkt tcprtt synack ackdat \\\n", "0 1424250009 3.943843 4.912488 0.000590 0.000473 0.000117 \n", "1 1424250009 16.155447 7.871279 0.000771 0.000638 0.000133 \n", "2 1424250009 0.009000 0.002000 0.000000 0.000000 0.000000 \n", "3 1424250009 1.011547 1.302561 0.000674 0.000540 0.000134 \n", "4 1424250009 0.009000 0.004000 0.000000 0.000000 0.000000 \n", "... ... ... ... ... ... ... \n", "2540042 1421955842 4.007400 2.027429 0.006386 0.006189 0.000197 \n", "2540043 1421955842 3.864028 1.882421 0.000712 0.000550 0.000162 \n", "2540044 1421955842 0.274261 0.285478 0.000657 0.000532 0.000125 \n", "2540045 1421955842 1.165667 0.987333 0.000000 0.000000 0.000000 \n", "2540046 1421955842 1.022690 0.997042 0.002317 0.002173 0.000144 \n", "\n", " is_sm_ips_ports ct_state_ttl ct_flw_http_mthd is_ftp_login \\\n", "0 0 0 -1 -1 \n", "1 0 0 1 -1 \n", "2 0 0 -1 -1 \n", "3 0 0 -1 -1 \n", "4 0 0 -1 -1 \n", "... ... ... ... ... \n", "2540042 0 0 0 0 \n", "2540043 0 0 4 0 \n", "2540044 0 0 0 0 \n", "2540045 0 0 0 0 \n", "2540046 0 0 0 0 \n", "\n", " ct_ftp_cmd ct_srv_src ct_srv_dst ct_dst_ltm ct_src_ ltm \\\n", "0 8 2 2 7 4 \n", "1 8 3 1 4 4 \n", "2 8 3 5 2 7 \n", "3 8 1 1 4 7 \n", "4 8 2 5 2 1 \n", "... ... ... ... ... ... \n", "2540042 0 8 20 7 5 \n", "2540043 0 1 1 2 7 \n", "2540044 0 13 13 6 7 \n", "2540045 0 10 13 6 5 \n", "2540046 0 13 13 6 7 \n", "\n", " ct_src_dport_ltm ct_dst_sport_ltm ct_dst_src_ltm Label \n", "0 1 1 3 normal \n", "1 1 1 1 normal \n", "2 1 1 4 normal \n", "3 1 1 3 normal \n", "4 1 1 2 normal \n", "... ... ... ... ... \n", "2540042 1 1 4 normal \n", "2540043 2 2 2 normal \n", "2540044 2 1 2 normal \n", "2540045 1 1 3 normal \n", "2540046 1 1 2 normal \n", "\n", "[2540047 rows x 48 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
srcipsportdstipdsportprotostatedursbytesdbytessttldttlslossdlossserviceSloadDloadSpktsDpktsswindwinstcpbdtcpbsmeanszdmeansztrans_depthres_bdy_lenSjitDjitStimeLtimeSintpktDintpkttcprttsynackackdatis_sm_ips_portsct_state_ttlct_flw_http_mthdis_ftp_loginct_ftp_cmdct_srv_srcct_srv_dstct_dst_ltmct_src_ ltmct_src_dport_ltmct_dst_sport_ltmct_dst_src_ltmLabel
0427045252511450.201886375523380312918891.459438e+061.307669e+055242255255142213655435726684847228000456.04356715.530109142425000914242500093.9438434.9124880.0005900.0004730.00011700-1-182274113normal
1429685208011455.8647481941010878903129237052.640454e+041.481983e+06364746255255389619597394688654531458101031.366423690.2195811424250003142425000916.1554477.8712790.0007710.0006380.000133001-183144111normal
2351421225312020.00139114617831290024.198418e+055.118620e+052200007389000.0000000.000000142425000914242500090.0090000.0020000.0000000.0000000.00000000-1-183527114normal
33521553202511450.053948378123380312919895.503374e+064.893601e+05544225525540475233791903327524700800065.9096883.155258142425000914242500091.0115471.3025610.0006740.0005400.00013400-1-181147113normal
44145212225312020.00095314617831290026.128017e+057.471144e+052200007389000.0000000.000000142425000914242500090.0090000.0040000.0000000.0000000.00000000-1-182521112normal
...................................................................................................................................................
25400424112520243101011450.020383320187431291201.047932e+056.436736e+0568255255320868647932254861685323400212.8107293.079195142195584214219558424.0074002.0274290.0063860.0061890.0001970000082075114normal
25400433318895278011451.4029571941010878903129237051.103783e+056.195098e+06364746255255283296697242973675453145813924203.808900114.173588142195584114219558423.8640281.8824210.0007120.0005500.000162004001127222normal
2540044333010323519011450.0071082158246431296602.328644e+062.658413e+0624242552557032938442848960529901030017.6278310.432619142195584214219558420.2742610.2854780.0006570.0005320.00012500000131367212normal
254004539303882311112020.00443556830431290007.684329e+054.112740e+0544000014276001.6386041.390643142195584214219558421.1656670.9873330.0000000.0000000.00000000000101365113normal
2540046336055235414511450.072974423860788312973004.582454e+056.571546e+06727225525510032931491003585034598440062.04531061.899776142195584214219558421.0226900.9970420.0023170.0021730.00014400000131367112normal
\n", "

2540047 rows × 48 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "column_headers = list(frame.columns.values)\n", "column_headers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "16e21026-d139-4863-9359-05b13cfd0a89", "id": "Evwnc93Kuv1G" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['srcip',\n", " 'sport',\n", " 'dstip',\n", " 'dsport',\n", " 'proto',\n", " 'state',\n", " 'dur',\n", " 'sbytes',\n", " 'dbytes',\n", " 'sttl',\n", " 'dttl',\n", " 'sloss',\n", " 'dloss',\n", " 'service',\n", " 'Sload',\n", " 'Dload',\n", " 'Spkts',\n", " 'Dpkts',\n", " 'swin',\n", " 'dwin',\n", " 'stcpb',\n", " 'dtcpb',\n", " 'smeansz',\n", " 'dmeansz',\n", " 'trans_depth',\n", " 'res_bdy_len',\n", " 'Sjit',\n", " 'Djit',\n", " 'Stime',\n", " 'Ltime',\n", " 'Sintpkt',\n", " 'Dintpkt',\n", " 'tcprtt',\n", " 'synack',\n", " 'ackdat',\n", " 'is_sm_ips_ports',\n", " 'ct_state_ttl',\n", " 'ct_flw_http_mthd',\n", " 'is_ftp_login',\n", " 'ct_ftp_cmd',\n", " 'ct_srv_src',\n", " 'ct_srv_dst',\n", " 'ct_dst_ltm',\n", " 'ct_src_ ltm',\n", " 'ct_src_dport_ltm',\n", " 'ct_dst_sport_ltm',\n", " 'ct_dst_src_ltm',\n", " 'Label']" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "CSV_HEADER = column_headers\n", "df = frame\n", "def Remove_Outlier_Indices(df):\n", " Q1 = df.quantile(0.02)\n", " Q3 = df.quantile(0.98)\n", " IQR = Q3 - Q1\n", " #trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n", " return trueList\n", "\n", "nonOutlierList = Remove_Outlier_Indices(df)\n", "new_data = df[nonOutlierList]\n", "\n", "df = new_data\n", "df = df.reset_index(drop=True)\n", "del new_data\n", "del nonOutlierList\n", "del li\n", "del frame\n", "import gc\n", "gc.collect()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "433bb956-2df9-4a09-b5e8-252d5f47ab73", "id": "QVxZ09Cwuv1H" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":4: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q1 = df.quantile(0.02)\n", ":5: FutureWarning: The default value of numeric_only in DataFrame.quantile is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " Q3 = df.quantile(0.98)\n", ":8: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`\n", " trueList = ~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "49" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import f_regression\n", "\n", "selector = SelectKBest(f_regression, k=10)\n", "X = df.drop(['Label'], axis=1)\n", "Y = df[\"Label\"].astype('category').cat.codes\n", "X_new = selector.fit(X, Y)\n", "X.columns.values[selector.get_support()]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "d0993c78-0506-4f62-ca8e-dba2b2d11c16", "id": "oAqsvLnbuv1M" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['state', 'sttl', 'ct_state_ttl', 'ct_srv_src', 'ct_srv_dst',\n", " 'ct_dst_ltm', 'ct_src_ ltm', 'ct_src_dport_ltm',\n", " 'ct_dst_sport_ltm', 'ct_dst_src_ltm'], dtype=object)" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "columns = X.columns.values[selector.get_support()]" ], "metadata": { "id": "pDyqDClMuv1N" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "columns = []\n", "for c in X.columns.values[selector.get_support()]:\n", " columns.append(str(c))" ], "metadata": { "id": "uCoBUv3Iuv1O" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "principalDf = pd.DataFrame(data = X\n", " , columns = X.columns.values[selector.get_support()])\n", "finalDf = pd.concat([principalDf, df[\"Label\"]], axis = 1)\n", "finalDf" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "outputId": "d55db406-4fc0-4216-f20d-5f800751df3d", "id": "KcYVo5R7uv1O" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " state sttl ct_state_ttl ct_srv_src ct_srv_dst ct_dst_ltm \\\n", "0 5 31 0 2 2 7 \n", "1 5 31 0 3 1 4 \n", "2 2 31 0 3 5 2 \n", "3 5 31 0 1 1 4 \n", "4 2 31 0 2 5 2 \n", "... ... ... ... ... ... ... \n", "2438669 5 31 0 8 20 7 \n", "2438670 5 31 0 1 1 2 \n", "2438671 5 31 0 13 13 6 \n", "2438672 2 31 0 10 13 6 \n", "2438673 5 31 0 13 13 6 \n", "\n", " ct_src_ ltm ct_src_dport_ltm ct_dst_sport_ltm ct_dst_src_ltm \\\n", "0 4 1 1 3 \n", "1 4 1 1 1 \n", "2 7 1 1 4 \n", "3 7 1 1 3 \n", "4 1 1 1 2 \n", "... ... ... ... ... \n", "2438669 5 1 1 4 \n", "2438670 7 2 2 2 \n", "2438671 7 2 1 2 \n", "2438672 5 1 1 3 \n", "2438673 7 1 1 2 \n", "\n", " Label \n", "0 normal \n", "1 normal \n", "2 normal \n", "3 normal \n", "4 normal \n", "... ... \n", "2438669 normal \n", "2438670 normal \n", "2438671 normal \n", "2438672 normal \n", "2438673 normal \n", "\n", "[2438674 rows x 11 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statesttlct_state_ttlct_srv_srcct_srv_dstct_dst_ltmct_src_ ltmct_src_dport_ltmct_dst_sport_ltmct_dst_src_ltmLabel
053102274113normal
153103144111normal
223103527114normal
353101147113normal
423102521112normal
....................................
2438669531082075114normal
243867053101127222normal
24386715310131367212normal
24386722310101365113normal
24386735310131367112normal
\n", "

2438674 rows × 11 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "train_data, test_data = train_test_split(finalDf, test_size=0.25)\n", "train_data_file = \"train_data.csv\"\n", "test_data_file = \"test_data.csv\"\n", "\n", "train_data.to_csv(train_data_file, index=False, header=False)\n", "test_data.to_csv(test_data_file, index=False, header=False)" ], "metadata": { "id": "AVvnjMP0uv1P" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "CSV_HEADER = []\n", "for x in columns:\n", " CSV_HEADER.append(x)\n", "CSV_HEADER.append(\"Label\")\n", "\n", "# A list of the numerical feature names.\n", "NUMERIC_FEATURE_NAMES = columns\n", "# A dictionary of the categorical features and their vocabulary.\n", "CATEGORICAL_FEATURES_WITH_VOCABULARY = {\n", "}\n", "# A list of the columns to ignore from the dataset.\n", "IGNORE_COLUMN_NAMES = []\n", "# A list of the categorical feature names.\n", "CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURES_WITH_VOCABULARY.keys())\n", "# A list of all the input features.\n", "FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES\n", "# A list of column default values for each feature.\n", "COLUMN_DEFAULTS = [\n", " [0.0] if feature_name in NUMERIC_FEATURE_NAMES + IGNORE_COLUMN_NAMES else [\"NA\"]\n", " for feature_name in CSV_HEADER\n", "]\n", "# The name of the target feature.\n", "TARGET_FEATURE_NAME = \"Label\"\n", "# A list of the labels of the target features.\n", "TARGET_LABELS = [\"normal\", \"anomaly\"]\n", "\n", "from tensorflow.keras.layers import StringLookup\n", "\n", "target_label_lookup = StringLookup(\n", " vocabulary=TARGET_LABELS, mask_token=None, num_oov_indices=0\n", ")\n", "\n", "\n", "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n", " dataset = tf.data.experimental.make_csv_dataset(\n", " csv_file_path,\n", " batch_size=batch_size,\n", " column_names=CSV_HEADER,\n", " column_defaults=COLUMN_DEFAULTS,\n", " label_name=TARGET_FEATURE_NAME,\n", " num_epochs=1,\n", " header=False,\n", " na_value=\"?\",\n", " shuffle=shuffle,\n", " ).map(lambda features, target: (features, target_label_lookup(target)))\n", " return dataset.cache()\n", "\n", "def create_model_inputs():\n", " inputs = {}\n", " for feature_name in FEATURE_NAMES:\n", " if feature_name in NUMERIC_FEATURE_NAMES:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.float32\n", " )\n", " else:\n", " inputs[feature_name] = layers.Input(\n", " name=feature_name, shape=(), dtype=tf.string\n", " )\n", " return inputs\n", "\n", "def encode_inputs(inputs):\n", " encoded_features = []\n", " for feature_name in inputs:\n", " if feature_name in CATEGORICAL_FEATURE_NAMES:\n", " vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]\n", " #print(vocabulary)\n", " # Create a lookup to convert a string values to an integer indices.\n", " # Since we are not using a mask token, nor expecting any out of vocabulary\n", " # (oov) token, we set mask_token to None and num_oov_indices to 0.\n", " lookup = StringLookup(\n", " vocabulary=vocabulary, mask_token=None, num_oov_indices=0\n", " )\n", " # Convert the string input values into integer indices.\n", " value_index = lookup(inputs[feature_name])\n", " embedding_dims = int(math.sqrt(lookup.vocabulary_size()))\n", " # Create an embedding layer with the specified dimensions.\n", " embedding = layers.Embedding(\n", " input_dim=lookup.vocabulary_size(), output_dim=embedding_dims\n", " )\n", " # Convert the index values to embedding representations.\n", " encoded_feature = embedding(value_index)\n", " else:\n", " # Use the numerical features as-is.\n", " encoded_feature = inputs[feature_name]\n", " if inputs[feature_name].shape[-1] is None:\n", " encoded_feature = tf.expand_dims(encoded_feature, -1)\n", "\n", " encoded_features.append(encoded_feature)\n", "\n", " encoded_features = layers.concatenate(encoded_features)\n", " return encoded_features\n", "\n", "class NeuralDecisionTree(keras.Model):\n", " def __init__(self, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.depth = depth\n", " self.num_leaves = 2 ** depth\n", " self.num_classes = num_classes\n", "\n", " # Create a mask for the randomly selected features.\n", " num_used_features = int(num_features * used_features_rate)\n", " one_hot = np.eye(num_features)\n", " sampled_feature_indicies = np.random.choice(\n", " np.arange(num_features), num_used_features, replace=False\n", " )\n", " self.used_features_mask = one_hot[sampled_feature_indicies]\n", "\n", " # Initialize the weights of the classes in leaves.\n", " self.pi = tf.Variable(\n", " initial_value=tf.random_normal_initializer()(\n", " shape=[self.num_leaves, self.num_classes]\n", " ),\n", " dtype=\"float32\",\n", " trainable=True,\n", " )\n", "\n", " # Initialize the stochastic routing layer.\n", " self.decision_fn = layers.Dense(\n", " units=self.num_leaves, activation=\"sigmoid\", name=\"decision\"\n", " )\n", "\n", " def call(self, features):\n", " batch_size = tf.shape(features)[0]\n", "\n", " # Apply the feature mask to the input features.\n", " features = tf.matmul(\n", " features, self.used_features_mask, transpose_b=True\n", " ) # [batch_size, num_used_features]\n", " # Compute the routing probabilities.\n", " decisions = tf.expand_dims(\n", " self.decision_fn(features), axis=2\n", " ) # [batch_size, num_leaves, 1]\n", " # Concatenate the routing probabilities with their complements.\n", " decisions = layers.concatenate(\n", " [decisions, 1 - decisions], axis=2\n", " ) # [batch_size, num_leaves, 2]\n", "\n", " mu = tf.ones([batch_size, 1, 1])\n", "\n", " begin_idx = 1\n", " end_idx = 2\n", " # Traverse the tree in breadth-first order.\n", " for level in range(self.depth):\n", " mu = tf.reshape(mu, [batch_size, -1, 1]) # [batch_size, 2 ** level, 1]\n", " mu = tf.tile(mu, (1, 1, 2)) # [batch_size, 2 ** level, 2]\n", " level_decisions = decisions[\n", " :, begin_idx:end_idx, :\n", " ] # [batch_size, 2 ** level, 2]\n", " mu = mu * level_decisions # [batch_size, 2**level, 2]\n", " begin_idx = end_idx\n", " end_idx = begin_idx + 2 ** (level + 1)\n", "\n", " mu = tf.reshape(mu, [batch_size, self.num_leaves]) # [batch_size, num_leaves]\n", " probabilities = keras.activations.softmax(self.pi) # [num_leaves, num_classes]\n", " outputs = tf.matmul(mu, probabilities) # [batch_size, num_classes]\n", " return outputs\n", "\n", "class NeuralDecisionForest(keras.Model):\n", " def __init__(self, num_trees, depth, num_features, used_features_rate, num_classes):\n", " super().__init__()\n", " self.ensemble = []\n", " # Initialize the ensemble by adding NeuralDecisionTree instances.\n", " # Each tree will have its own randomly selected input features to use.\n", " for _ in range(num_trees):\n", " self.ensemble.append(\n", " NeuralDecisionTree(depth, num_features, used_features_rate, num_classes)\n", " )\n", "\n", " def call(self, inputs):\n", " # Initialize the outputs: a [batch_size, num_classes] matrix of zeros.\n", " batch_size = tf.shape(inputs)[0]\n", " outputs = tf.zeros([batch_size, num_classes])\n", "\n", " # Aggregate the outputs of trees in the ensemble.\n", " for tree in self.ensemble:\n", " outputs += tree(inputs)\n", " # Divide the outputs by the ensemble size to get the average.\n", " outputs /= len(self.ensemble)\n", " return outputs\n", "learning_rate = 0.01\n", "batch_size = 128\n", "num_epochs = 10\n", "\n", "\n", "def run_experiment(model):\n", "\n", " # model.compile(\n", " # optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " # loss=keras.losses.SparseCategoricalCrossentropy(),\n", " # metrics=[keras.metrics.SparseCategoricalAccuracy()],\n", " # )\n", " model.compile(\n", " optimizer=keras.optimizers.Adam(learning_rate=learning_rate),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[metrics.SparseCategoricalAccuracy()],\n", " )\n", " print(\"Start training the model...\")\n", " train_dataset = get_dataset_from_csv(\n", " train_data_file, shuffle=True, batch_size=batch_size\n", " )\n", "\n", " model.fit(train_dataset, epochs=num_epochs)\n", " print(\"Model training finished\")\n", "\n", " print(\"Evaluating the model on the test data...\")\n", " test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "\n", " _, accuracy = model.evaluate(test_dataset)\n", " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", " return model" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "d0ee3730-614b-47ea-e65f-85ec9485d949", "id": "rwCv3SBwuv1P" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/numpy/core/numeric.py:2463: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " return bool(asarray(a1 == a2).all())\n" ] } ] }, { "cell_type": "code", "source": [ "num_trees = 25\n", "depth = 5\n", "used_features_rate = 0.5\n", "num_classes = len(TARGET_LABELS)\n", "\n", "\n", "def create_forest_model():\n", " inputs = create_model_inputs()\n", " features = encode_inputs(inputs)\n", " features = layers.BatchNormalization()(features)\n", " num_features = features.shape[1]\n", "\n", " forest_model = NeuralDecisionForest(\n", " num_trees, depth, num_features, used_features_rate, num_classes\n", " )\n", "\n", " outputs = forest_model(features)\n", " model = keras.Model(inputs=inputs, outputs=outputs)\n", " return model\n", "\n", "\n", "forest_model = create_forest_model()\n", "\n", "finalModel = run_experiment(forest_model)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "624abaad-2a9d-4f6b-e3f0-d846d966418b", "id": "57cf8V6duv1R" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start training the model...\n", "Epoch 1/10\n", "14290/14290 [==============================] - 390s 25ms/step - loss: 0.0363 - sparse_categorical_accuracy: 0.9887\n", "Epoch 2/10\n", "14290/14290 [==============================] - 307s 21ms/step - loss: 0.0309 - sparse_categorical_accuracy: 0.9902\n", "Epoch 3/10\n", "14290/14290 [==============================] - 301s 21ms/step - loss: 0.0306 - sparse_categorical_accuracy: 0.9903\n", "Epoch 4/10\n", "14290/14290 [==============================] - 277s 19ms/step - loss: 0.0303 - sparse_categorical_accuracy: 0.9903\n", "Epoch 5/10\n", "14290/14290 [==============================] - 307s 22ms/step - loss: 0.0301 - sparse_categorical_accuracy: 0.9904\n", "Epoch 6/10\n", "14290/14290 [==============================] - 297s 21ms/step - loss: 0.0298 - sparse_categorical_accuracy: 0.9904\n", "Epoch 7/10\n", "14290/14290 [==============================] - 303s 21ms/step - loss: 0.0296 - sparse_categorical_accuracy: 0.9904\n", "Epoch 8/10\n", "14290/14290 [==============================] - 303s 21ms/step - loss: 0.0294 - sparse_categorical_accuracy: 0.9904\n", "Epoch 9/10\n", "14290/14290 [==============================] - 307s 22ms/step - loss: 0.0293 - sparse_categorical_accuracy: 0.9905\n", "Epoch 10/10\n", "14290/14290 [==============================] - 317s 22ms/step - loss: 0.0292 - sparse_categorical_accuracy: 0.9904\n", "Model training finished\n", "Evaluating the model on the test data...\n", "4764/4764 [==============================] - 50s 10ms/step - loss: 0.0283 - sparse_categorical_accuracy: 0.9904\n", "Test accuracy: 99.04%\n" ] } ] }, { "cell_type": "code", "source": [ "test_dataset = get_dataset_from_csv(test_data_file, batch_size=batch_size)\n", "colnames=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'target']\n", "data = pd.read_csv(\"test_data.csv\", names=colnames, header=None)\n", "data['target'].replace('normal', 0,inplace=True)\n", "data['target'].replace('anomaly', 1,inplace=True)\n", "y_test = data['target'].values # as a numpy array\n", "from sklearn.metrics import confusion_matrix\n", "y_prediction = finalModel.predict(test_dataset)\n", "y_prediction = np.argmax (y_prediction, axis = 1)\n", "result = confusion_matrix(y_test, y_prediction , normalize='pred')\n", "print(result)\n", "TP = result[0][0]\n", "FP = result[0][1]\n", "TN = result[1][1]\n", "FN = result[1][0]\n", "ACC = (TP+TN)/(TP+TN+FP+FN)\n", "PR = TP/(TP+FP) #precision\n", "TPR = TP/(TP+FN) #Recall or True positive rate\n", "FPR = FP/(FP+TN)\n", "F1Score = 2*(PR*TPR)/(PR+TPR)\n", "print(\"ACC: \" + str(ACC))\n", "print(\"PR: \" + str(PR))\n", "print(\"TPR: \" + str(TPR))\n", "print(\"FPR: \" + str(FPR))\n", "print(\"F1Score: \" + str(F1Score))\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "from sklearn import metrics\n", "\n", "\n", "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = result, display_labels = [True, False])\n", "\n", "cm_display.plot()\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 588 }, "outputId": "5d4d957a-9bac-4808-ea5c-78a5a7cecff4", "id": "ZWRkWlYzuv1R" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "4764/4764 [==============================] - 47s 9ms/step\n", "[[0.99675629 0.05519602]\n", " [0.00324371 0.94480398]]\n", "ACC: 0.9707801335476056\n", "PR: 0.9475299209852083\n", "TPR: 0.9967562878472034\n", "FPR: 0.0551960207519923\n", "F1Score: 0.9715199360823916\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] } ] }