{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "name": "#%% Imporintg packages\n" } }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "from sklearn.preprocessing import OrdinalEncoder\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import *\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "name": "#%% Reading befign dataset\n" } }, "outputs": [], "source": [ "df_benign = pd.read_csv('D://OneDrive - Higher Education Commission//Collaborations//Daud Khan//Total-CSVs//l2-benign.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": { "name": "#%% Reading doh dataset\n" } }, "outputs": [], "source": [ "# df_doh = pd.read_csv('D://OneDrive - Higher Education Commission//Collaborations//Daud Khan//Total-CSVs//l1-doh.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "name": "#%% Reading nondoh dataset\n" } }, "outputs": [], "source": [ "# df_nondoh = pd.read_csv('D://OneDrive - Higher Education Commission//Collaborations//Daud Khan//Total-CSVs//l1-nondoh.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "pycharm": { "name": "#%% Reading malicious dataset\n" } }, "outputs": [], "source": [ "df_malicious = pd.read_csv('D://OneDrive - Higher Education Commission//Collaborations//Daud Khan//Total-CSVs//l2-malicious.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "pycharm": { "name": "#%% Concatination of datasets\n" } }, "outputs": [], "source": [ "# df = pd.concat([df_benign, df_doh, df_nondoh,df_malicious], ignore_index=True)\n", "df = pd.concat([df_benign[1:15000], df_malicious[1:15000]], ignore_index=True)\n", "# df = pd.concat([df_benign[1:1000], df_doh[1:1000], df_nondoh[1:1000],df_malicious[1:1000]], ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "pycharm": { "name": "#%% Printing dataset summary\n" } }, "outputs": [ { "data": { "text/plain": " SourceIP DestinationIP SourcePort DestinationPort \\\n0 192.168.20.191 176.103.130.131 50749 443 \n1 192.168.20.191 176.103.130.131 50749 443 \n2 192.168.20.191 176.103.130.131 50749 443 \n3 176.103.130.131 192.168.20.191 443 50749 \n4 192.168.20.191 176.103.130.131 52491 443 \n... ... ... ... ... \n29993 192.168.20.144 8.8.4.4 52562 443 \n29994 192.168.20.144 8.8.4.4 52566 443 \n29995 8.8.4.4 192.168.20.144 443 52560 \n29996 192.168.20.144 8.8.4.4 52564 443 \n29997 192.168.20.144 8.8.4.4 52568 443 \n\n TimeStamp Duration FlowBytesSent FlowSentRate \\\n0 2020-01-14 15:50:52 122.309318 93828 767.136973 \n1 2020-01-14 15:52:55 120.958413 38784 320.639127 \n2 2020-01-14 15:54:56 110.501080 61993 561.017141 \n3 2020-01-14 15:56:46 54.229891 83641 1542.341289 \n4 2020-01-14 15:57:40 145.460721 54084 371.811714 \n... ... ... ... ... \n29993 2020-03-19 06:25:10 120.832003 612 5.064883 \n29994 2020-03-19 06:25:12 105.760489 21611 204.339070 \n29995 2020-03-19 06:26:10 58.923443 526 8.926837 \n29996 2020-03-19 06:26:11 120.831985 612 5.064884 \n29997 2020-03-19 06:26:13 105.144769 21797 207.304654 \n\n FlowBytesReceived FlowReceivedRate ... \\\n0 101232 827.672018 ... \n1 38236 316.108645 ... \n2 69757 631.278898 ... \n3 76804 1416.266907 ... \n4 63843 438.901991 ... \n... ... ... ... \n29993 544 4.502119 ... \n29994 42521 402.049956 ... \n29995 364 6.177507 ... \n29996 544 4.502119 ... \n29997 41816 397.699290 ... \n\n PacketTimeCoefficientofVariation ResponseTimeTimeVariance \\\n0 0.509047 1.169641e-03 \n1 0.732636 7.851554e-04 \n2 0.646859 4.110453e-04 \n3 0.507334 7.907866e-02 \n4 0.736075 6.430451e-04 \n... ... ... \n29993 0.653848 2.348913e-06 \n29994 0.630948 8.580713e-05 \n29995 0.508427 4.263347e+01 \n29996 0.653859 7.949375e-10 \n29997 0.659386 7.659894e-03 \n\n ResponseTimeTimeStandardDeviation ResponseTimeTimeMean \\\n0 0.034200 0.024387 \n1 0.028021 0.029238 \n2 0.020274 0.019925 \n3 0.281209 0.025930 \n4 0.025358 0.025075 \n... ... ... \n29993 0.001533 0.025651 \n29994 0.009263 0.025111 \n29995 6.529431 11.309422 \n29996 0.000028 0.024681 \n29997 0.087521 0.037465 \n\n ResponseTimeTimeMedian ResponseTimeTimeMode \\\n0 0.021043 0.026981 \n1 0.026921 0.026855 \n2 0.019268 0.026918 \n3 0.000047 0.000021 \n4 0.026813 0.026903 \n... ... ... \n29993 0.024989 0.024923 \n29994 0.024712 0.024663 \n29995 15.079167 15.079167 \n29996 0.024687 0.024626 \n29997 0.025098 0.025067 \n\n ResponseTimeTimeSkewFromMedian ResponseTimeTimeSkewFromMode \\\n0 0.293297 -0.075845 \n1 0.248064 0.085061 \n2 0.097199 -0.344926 \n3 0.276133 0.092135 \n4 -0.205614 -0.072087 \n... ... ... \n29993 1.296557 0.475249 \n29994 0.129221 0.048363 \n29995 -1.732040 -0.577347 \n29996 -0.611819 1.941859 \n29997 0.423927 0.141663 \n\n ResponseTimeTimeCoefficientofVariation Label \n0 1.402382 Benign \n1 0.958348 Benign \n2 1.017535 Benign \n3 10.844829 Benign \n4 1.011300 Benign \n... ... ... \n29993 0.059748 Malicious \n29994 0.368891 Malicious \n29995 0.577344 Malicious \n29996 0.001142 Malicious \n29997 2.336039 Malicious \n\n[29998 rows x 35 columns]", "text/html": "
\n | SourceIP | \nDestinationIP | \nSourcePort | \nDestinationPort | \nTimeStamp | \nDuration | \nFlowBytesSent | \nFlowSentRate | \nFlowBytesReceived | \nFlowReceivedRate | \n... | \nPacketTimeCoefficientofVariation | \nResponseTimeTimeVariance | \nResponseTimeTimeStandardDeviation | \nResponseTimeTimeMean | \nResponseTimeTimeMedian | \nResponseTimeTimeMode | \nResponseTimeTimeSkewFromMedian | \nResponseTimeTimeSkewFromMode | \nResponseTimeTimeCoefficientofVariation | \nLabel | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:50:52 | \n122.309318 | \n93828 | \n767.136973 | \n101232 | \n827.672018 | \n... | \n0.509047 | \n1.169641e-03 | \n0.034200 | \n0.024387 | \n0.021043 | \n0.026981 | \n0.293297 | \n-0.075845 | \n1.402382 | \nBenign | \n
1 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:52:55 | \n120.958413 | \n38784 | \n320.639127 | \n38236 | \n316.108645 | \n... | \n0.732636 | \n7.851554e-04 | \n0.028021 | \n0.029238 | \n0.026921 | \n0.026855 | \n0.248064 | \n0.085061 | \n0.958348 | \nBenign | \n
2 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:54:56 | \n110.501080 | \n61993 | \n561.017141 | \n69757 | \n631.278898 | \n... | \n0.646859 | \n4.110453e-04 | \n0.020274 | \n0.019925 | \n0.019268 | \n0.026918 | \n0.097199 | \n-0.344926 | \n1.017535 | \nBenign | \n
3 | \n176.103.130.131 | \n192.168.20.191 | \n443 | \n50749 | \n2020-01-14 15:56:46 | \n54.229891 | \n83641 | \n1542.341289 | \n76804 | \n1416.266907 | \n... | \n0.507334 | \n7.907866e-02 | \n0.281209 | \n0.025930 | \n0.000047 | \n0.000021 | \n0.276133 | \n0.092135 | \n10.844829 | \nBenign | \n
4 | \n192.168.20.191 | \n176.103.130.131 | \n52491 | \n443 | \n2020-01-14 15:57:40 | \n145.460721 | \n54084 | \n371.811714 | \n63843 | \n438.901991 | \n... | \n0.736075 | \n6.430451e-04 | \n0.025358 | \n0.025075 | \n0.026813 | \n0.026903 | \n-0.205614 | \n-0.072087 | \n1.011300 | \nBenign | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
29993 | \n192.168.20.144 | \n8.8.4.4 | \n52562 | \n443 | \n2020-03-19 06:25:10 | \n120.832003 | \n612 | \n5.064883 | \n544 | \n4.502119 | \n... | \n0.653848 | \n2.348913e-06 | \n0.001533 | \n0.025651 | \n0.024989 | \n0.024923 | \n1.296557 | \n0.475249 | \n0.059748 | \nMalicious | \n
29994 | \n192.168.20.144 | \n8.8.4.4 | \n52566 | \n443 | \n2020-03-19 06:25:12 | \n105.760489 | \n21611 | \n204.339070 | \n42521 | \n402.049956 | \n... | \n0.630948 | \n8.580713e-05 | \n0.009263 | \n0.025111 | \n0.024712 | \n0.024663 | \n0.129221 | \n0.048363 | \n0.368891 | \nMalicious | \n
29995 | \n8.8.4.4 | \n192.168.20.144 | \n443 | \n52560 | \n2020-03-19 06:26:10 | \n58.923443 | \n526 | \n8.926837 | \n364 | \n6.177507 | \n... | \n0.508427 | \n4.263347e+01 | \n6.529431 | \n11.309422 | \n15.079167 | \n15.079167 | \n-1.732040 | \n-0.577347 | \n0.577344 | \nMalicious | \n
29996 | \n192.168.20.144 | \n8.8.4.4 | \n52564 | \n443 | \n2020-03-19 06:26:11 | \n120.831985 | \n612 | \n5.064884 | \n544 | \n4.502119 | \n... | \n0.653859 | \n7.949375e-10 | \n0.000028 | \n0.024681 | \n0.024687 | \n0.024626 | \n-0.611819 | \n1.941859 | \n0.001142 | \nMalicious | \n
29997 | \n192.168.20.144 | \n8.8.4.4 | \n52568 | \n443 | \n2020-03-19 06:26:13 | \n105.144769 | \n21797 | \n207.304654 | \n41816 | \n397.699290 | \n... | \n0.659386 | \n7.659894e-03 | \n0.087521 | \n0.037465 | \n0.025098 | \n0.025067 | \n0.423927 | \n0.141663 | \n2.336039 | \nMalicious | \n
29998 rows × 35 columns
\n\n | SourceIP | \nDestinationIP | \nSourcePort | \nDestinationPort | \nTimeStamp | \nDuration | \nFlowBytesSent | \nFlowSentRate | \nFlowBytesReceived | \nFlowReceivedRate | \n... | \nPacketTimeCoefficientofVariation | \nResponseTimeTimeVariance | \nResponseTimeTimeStandardDeviation | \nResponseTimeTimeMean | \nResponseTimeTimeMedian | \nResponseTimeTimeMode | \nResponseTimeTimeSkewFromMedian | \nResponseTimeTimeSkewFromMode | \nResponseTimeTimeCoefficientofVariation | \nLabel | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:50:52 | \n122.309318 | \n93828 | \n767.136973 | \n101232 | \n827.672018 | \n... | \n0.509047 | \n0.001170 | \n0.034200 | \n0.024387 | \n0.021043 | \n0.026981 | \n0.293297 | \n-0.075845 | \n1.402382 | \nBenign | \n
1 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:52:55 | \n120.958413 | \n38784 | \n320.639127 | \n38236 | \n316.108645 | \n... | \n0.732636 | \n0.000785 | \n0.028021 | \n0.029238 | \n0.026921 | \n0.026855 | \n0.248064 | \n0.085061 | \n0.958348 | \nBenign | \n
2 | \n192.168.20.191 | \n176.103.130.131 | \n50749 | \n443 | \n2020-01-14 15:54:56 | \n110.501080 | \n61993 | \n561.017141 | \n69757 | \n631.278898 | \n... | \n0.646859 | \n0.000411 | \n0.020274 | \n0.019925 | \n0.019268 | \n0.026918 | \n0.097199 | \n-0.344926 | \n1.017535 | \nBenign | \n
3 | \n176.103.130.131 | \n192.168.20.191 | \n443 | \n50749 | \n2020-01-14 15:56:46 | \n54.229891 | \n83641 | \n1542.341289 | \n76804 | \n1416.266907 | \n... | \n0.507334 | \n0.079079 | \n0.281209 | \n0.025930 | \n0.000047 | \n0.000021 | \n0.276133 | \n0.092135 | \n10.844829 | \nBenign | \n
4 | \n192.168.20.191 | \n176.103.130.131 | \n52491 | \n443 | \n2020-01-14 15:57:40 | \n145.460721 | \n54084 | \n371.811714 | \n63843 | \n438.901991 | \n... | \n0.736075 | \n0.000643 | \n0.025358 | \n0.025075 | \n0.026813 | \n0.026903 | \n-0.205614 | \n-0.072087 | \n1.011300 | \nBenign | \n
5 rows × 35 columns
\n