{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from fastai import *\n", "from fastai.vision import *\n", "\n", "path = Path(\"../pap_smear\")\n", "classes = [\"normal\", \"abnormal\"]\n", " \n", "tfms = get_transforms(flip_vert=True, max_warp=0.0, max_zoom=0.5)\n", "data = (ImageList.from_folder(path=path)\n", " .filter_by_func(lambda fname: \"-d\" not in fname.name)\n", " .split_by_rand_pct(valid_pct=0.2)\n", " .label_from_func(lambda fname: classes.index(fname.parts[-2].split(\"_\")[0]))\n", " .transform(tfms, size=32)\n", " .databunch(bs=8)\n", " .normalize(imagenet_stats))\n", "data.show_batch(rows=2, figsize=(5, 5))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import joblib" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(734, 32, 32, 3) (734,)\n" ] } ], "source": [ "train_images = []\n", "train_labels = []\n", "\n", "for (x, y) in data.train_ds:\n", " x = x.data.permute(2, 1, 0).numpy()\n", " y = y.obj\n", " train_images.append(x)\n", " train_labels.append(y)\n", "\n", "train_images = np.array(train_images)\n", "train_labels = np.array(train_labels)\n", "print(train_images.shape, train_labels.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(183, 32, 32, 3) (183,)\n" ] } ], "source": [ "val_images = []\n", "val_labels = []\n", "\n", "for (x, y) in data.valid_ds:\n", " x = x.data.permute(2, 1, 0).numpy()\n", " y = y.obj\n", " val_images.append(x)\n", " val_labels.append(y)\n", "\n", "val_images = np.array(val_images)\n", "val_labels = np.array(val_labels)\n", "print(val_images.shape, val_labels.shape)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from sklearn.base import BaseEstimator, TransformerMixin\n", "from skimage.feature import hog\n", "from skimage.io import imread\n", "from skimage.transform import rescale\n", " \n", "class RGB2GrayTransformer(BaseEstimator, TransformerMixin):\n", " \"\"\"\n", " Convert an array of RGB images to grayscale\n", " \"\"\"\n", " \n", " def __init__(self):\n", " pass\n", " \n", " def fit(self, X, y=None):\n", " \"\"\"returns itself\"\"\"\n", " return self\n", " \n", " def transform(self, X, y=None):\n", " \"\"\"perform the transformation and return an array\"\"\"\n", " return np.array([skimage.color.rgb2gray(img) for img in X])\n", " \n", " \n", "class HogTransformer(BaseEstimator, TransformerMixin):\n", " \"\"\"\n", " Expects an array of 2d arrays (1 channel images)\n", " Calculates hog features for each img\n", " \"\"\"\n", " \n", " def __init__(self, y=None, orientations=9,\n", " pixels_per_cell=(8, 8),\n", " cells_per_block=(3, 3), block_norm='L2-Hys'):\n", " self.y = y\n", " self.orientations = orientations\n", " self.pixels_per_cell = pixels_per_cell\n", " self.cells_per_block = cells_per_block\n", " self.block_norm = block_norm\n", " \n", " def fit(self, X, y=None):\n", " return self\n", " \n", " def transform(self, X, y=None):\n", " \n", " def local_hog(X):\n", " return hog(X,\n", " orientations=self.orientations,\n", " pixels_per_cell=self.pixels_per_cell,\n", " cells_per_block=self.cells_per_block,\n", " block_norm=self.block_norm)\n", " \n", " try: # parallel\n", " return np.array([local_hog(img) for img in X])\n", " except:\n", " return np.array([local_hog(img) for img in X])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import SGDClassifier\n", "from sklearn.model_selection import cross_val_predict\n", "from sklearn.preprocessing import StandardScaler\n", "import skimage\n", " \n", "# create an instance of each transformer\n", "grayify = RGB2GrayTransformer()\n", "hogify = HogTransformer(\n", " pixels_per_cell=(8, 8),\n", " cells_per_block=(2,2),\n", " orientations=9,\n", " block_norm='L2-Hys'\n", ")\n", "scalify = StandardScaler()\n", " \n", "# call fit_transform on each transform converting X_train step by step\n", "X_train_gray = grayify.fit_transform(train_images)\n", "X_train_hog = hogify.fit_transform(X_train_gray)\n", "X_train_prepared = scalify.fit_transform(X_train_hog)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(734, 324)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train_prepared.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X_test_gray = grayify.transform(val_images)\n", "X_test_hog = hogify.transform(X_test_gray)\n", "X_test_prepared = scalify.transform(X_test_hog)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## SGD Classifier" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n", " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n", " l1_ratio=0.15, learning_rate='optimal', loss='hinge',\n", " max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',\n", " power_t=0.5, random_state=42, shuffle=True, tol=0.001,\n", " validation_fraction=0.1, verbose=0, warm_start=False)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import SGDClassifier\n", "sgd_clf = SGDClassifier(random_state=42, max_iter=1000, tol=1e-3)\n", "sgd_clf.fit(X_train_prepared, train_labels)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[False True False False False True True False False True True False True False False False True False True\n", " True True False True True True]\n", "\n", "Percentage correct: 71.5846994535519\n" ] } ], "source": [ "y_pred = sgd_clf.predict(X_test_prepared)\n", "print(np.array(y_pred == val_labels)[:25])\n", "print('')\n", "print('Percentage correct: ', 100*np.sum(y_pred == val_labels)/len(val_labels))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn import metrics\n", "\n", "disp = metrics.plot_confusion_matrix(sgd_clf, X_test_prepared, val_labels)\n", "plt.show(disp)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['sgd_clf.pkl']" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(sgd_clf, \"sgd_clf.pkl\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 71.58469945355192\n", "Precision : 78.37837837837837\n", "Recall : 85.29411764705883\n", "Specificity : 31.914893617021278\n", "H-mean : 46.44954618259477\n", "F1 score : 81.69014084507042\n" ] } ], "source": [ "preds = sgd_clf.predict(X_test_prepared)\n", "tn, fp, fn, tp = metrics.confusion_matrix(val_labels, preds).ravel()\n", "sens = (tp / (tp + fn)) * 100\n", "spec = (tn / (tn + fp)) * 100\n", "prec = (tp / (tp + fp)) * 100\n", "accuracy = ((tp + tn) / (tp + tn + fn + fp)) * 100\n", "h_mean = 2 * ((sens * spec) / (sens + spec))\n", "f1 = 2 * ((prec * sens) / (prec + sens))\n", "\n", "print(f\"Accuracy : {accuracy}\\nPrecision : {prec}\\nRecall : {sens}\\nSpecificity : {spec}\\nH-mean : {h_mean}\\nF1 score : {f1}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## SVM Classifier" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.svm import SVC\n", "svm_clf = SVC(kernel=\"rbf\")\n", "svm_clf.fit(X_train_prepared, train_labels)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ True True False True False True True False False True True True True False False True True False True\n", " True True False True True True]\n", "\n", "Percentage correct: 76.50273224043715\n" ] } ], "source": [ "y_pred = svm_clf.predict(X_test_prepared)\n", "print(np.array(y_pred == val_labels)[:25])\n", "print('')\n", "print('Percentage correct: ', 100*np.sum(y_pred == val_labels)/len(val_labels))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "disp = metrics.plot_confusion_matrix(svm_clf, X_test_prepared, val_labels)\n", "plt.show(disp)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['svm_clf.pkl']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(svm_clf, \"svm_clf.pkl\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 76.50273224043715\n", "Precision : 76.27118644067797\n", "Recall : 99.26470588235294\n", "Specificity : 10.638297872340425\n", "H-mean : 19.217081850533805\n", "F1 score : 86.26198083067092\n" ] } ], "source": [ "preds = svm_clf.predict(X_test_prepared)\n", "tn, fp, fn, tp = metrics.confusion_matrix(val_labels, preds).ravel()\n", "sens = (tp / (tp + fn)) * 100\n", "spec = (tn / (tn + fp)) * 100\n", "prec = (tp / (tp + fp)) * 100\n", "accuracy = ((tp + tn) / (tp + tn + fn + fp)) * 100\n", "h_mean = 2 * ((sens * spec) / (sens + spec))\n", "f1 = 2 * ((prec * sens) / (prec + sens))\n", "\n", "print(f\"Accuracy : {accuracy}\\nPrecision : {prec}\\nRecall : {sens}\\nSpecificity : {spec}\\nH-mean : {h_mean}\\nF1 score : {f1}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## KNN Classifier" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=None, n_neighbors=5, p=2,\n", " weights='uniform')" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import KNeighborsClassifier as knn\n", "\n", "knn_clf = knn()\n", "knn_clf.fit(X_train_prepared, train_labels)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ True True False True False True True True False True True False True False False True True True True\n", " True True False True True False]\n", "\n", "Percentage correct: 78.14207650273224\n" ] } ], "source": [ "y_pred = knn_clf.predict(X_test_prepared)\n", "print(np.array(y_pred == val_labels)[:25])\n", "print('')\n", "print('Percentage correct: ', 100*np.sum(y_pred == val_labels)/len(val_labels))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAZn0lEQVR4nO3de7xVdZ3/8df7nAOoXAQkEQED74OaoojMWA2mPy9l6Thlmj8vk2aYMqmVl7Kf2WTp2GgaZjFp6i9HJcvRyZLUNNO8gVeUEMRECOUqIqBwzvnMH2thW4Rz1tpnb/bei/fz8ViPs9flrO9nc/LT97qWIgIzsyJqqnUAZmbV4gRnZoXlBGdmheUEZ2aF5QRnZoXVUusASnVv2iw2b+5d6zAsh/YtetQ6BMvh7VVLWbN6hbpyj0MO6BmLl7Rlunbqs+9MjohDu1JeV9RVgtu8uTd/3++fax2G5bBy3+1rHYLl8NRDV3X5HouXtPH45O0yXds8aOaALhfYBXWV4Mys/gXQTnutw8jECc7McgmCNZGtiVprTnBmlptrcGZWSEHQ1iBLPJ3gzCy3dpzgzKyAAmhzgjOzonINzswKKYA17oMzsyIKwk1UMyuogLbGyG9OcGaWT7KSoTE4wZlZTqKNLq3X32ic4Mwsl2SQoTESnJ8HZ2a5JPPglGnrjKTrJC2QNK3k2GWS/izpWUm3S+pbcu58SbMkzZB0SGf3d4Izs9zaQ5m2DK4H1n1e3D3A7hHxIeBF4HwASSOAY4Dd0t/5kaTmjm7uBGdmuVSyBhcRDwJL1jn2u4hoTXcfBYakn48AbomIdyLiZWAWMLqj+7sPzsxyCURb9rrRAElTSvYnRsTEHMV9Hrg1/TyYJOGtNTc9tkFOcGaWW8bmJ8CiiBhVThmSvgG0AjeV8/vgBGdmOQVidXTY9dVlkk4CDgcOjHh3Xdg8YGjJZUPSYxvkPjgzyyWZ6NuUaSuHpEOBc4BPRcTKklN3AsdI6iFpOLAT8HhH93INzsxyq9REX0k3A2NJ+urmAheSjJr2AO6RBPBoRIyLiOclTQJeIGm6nh7R8bPTneDMLJcI0RaVafxFxLHrOXxtB9dfDFyc9f5OcGaWW7uXaplZESWDDI2ROhojSjOrG2sHGRqBE5yZ5dbWIIvtneDMLJecKxlqygnOzHJrr9AoarU5wZlZLslieyc4MyugQKyp8lKtSnGCM7NcIqjYRN9qc4Izs5zkib5mVkyBa3BmVmAeZDCzQgoyv2+h5pzgzCyX5LWBjZE6GiNKM6sjfvGzmRVU4JUMZlZgrsGZWSFFyDU4MyumZJDBS7XMrJAq906GanOCM7NckkEG98GZWUF5JYOZFZJXMphZofmlM2ZWSBGwpt0JzswKKGmiNkaCa4wozayutKXrUTvbOiPpOkkLJE0rOdZf0j2SZqY/+6XHJekqSbMkPStp787u7xpchZ150XRG/+Mi3ljSnS8dtR8Ax58+mzEHLKS9XSxb0o3LvzmCJQt71DhSA+je0sqV595Ft25tNDe184epw7n+jn3ePT/+2D/x8Q+/yGGnn1S7IOtMhaeJXA9MAG4sOXYecF9EXCLpvHT/XOAwYKd02w+4Jv25QVWtwUk6VNKMNOOeV82y6sW9d27DN0/b6z3Hbrt+O07/9H6MP3o0jz84gM998eUaRWfrWt3azNnf/zinfOsoTrnoKEbvPpcR2y8AYJcPLqR3z9U1jrAeJU3ULFtnIuJBYMk6h48Abkg/3wAcWXL8xkg8CvSVNKij+1ctwUlqBq4mybojgGMljahWefVi2tR+LF/23orxqhV/299s8zaiQRYqbxrEqne6AdDS3E5LczsR0KR2xh39OD/+xegax1ef2tP3MnS2AQMkTSnZTs1w+4ERMT/9/BowMP08GHi15Lq56bENqmYTdTQwKyJmA0i6hSQDv1DFMuvWCeNf4sBPvsaKt1o47+SRtQ7HSjSpnYn/778ZvPWb3H7/CKa/vDX/fNA0Hn56O5Ys26LW4dWdZBQ181rURRExqvyyIiRFub9fzSZqpmwr6dS12X11+9tVDKe2bvzhDpx48P48cNdAPnns3FqHYyXao4lTLjqKz3z1WP5u+EI+tPN8xo56mdvv263WodWltRN9s2xlen1t0zP9uSA9Pg8YWnLdkPTYBtV8FDUiJkbEqIgY1b1ps1qHU3X337UN+x+0sNZh2Hq8taoHT/15ECN3nc/grd/kpu9N4pZLb6FH91Zu+u6kWodXV3I0UctxJ3Bi+vlE4I6S4yeko6ljgGUlTdn1qmYTNXe2Laptt1vJX+ckTZ0xByxk7stu9tSLLXutoq2tibdW9aB7t1ZGjZjHzb/dk6POPu7da3579fUc9/WjaxhlfankKKqkm4GxJH11c4ELgUuASZJOBl4B1v7j/wb4ODALWAn8S2f3r2aCewLYSdJwksR2DPC5KpZXF865dBofGvUGffqu4cZ7HubnPxrOvh9ZzOBhK4l2WDB/Myb82661DtNSW/VdyfknP0iT2mlqgvufGM4jz25X67DqXqUm+kbEsRs4deB6rg3g9Dz3r1qCi4hWSWcAk4Fm4LqIeL5a5dWLfz939/cd+93t29YgEsti9tyt+MJF/9ThNZ4D914RorVBVjJUdaJvRPyGpFppZgXip4mYWSH5gZdmVmhOcGZWSH7gpZkVWhfmuG1UTnBmlksEtPqBl2ZWVG6imlkhuQ/OzAotnODMrKg8yGBmhRThPjgzKyzR5lFUMysq98GZWSF5LaqZFVck/XCNwAnOzHLzKKqZFVJ4kMHMisxNVDMrLI+imlkhRTjBmVmBeZqImRWW++DMrJAC0e5RVDMrqgapwDnBmVlODTTI0Bj1TDOrL5Fx64SksyQ9L2mapJslbSZpuKTHJM2SdKuk7uWGucEEJ6lPR1u5BZpZ44tQpq0jkgYD/wqMiojdgWbgGOBS4IqI2BFYCpxcbpwdNVGfJ8nBpVGu3Q9gu3ILNbPGFUB7e8WaqC3A5pLWAFsA84GPAZ9Lz98AfAu4ptybr1dEDC3nhmZWcAFk74MbIGlKyf7EiJgIEBHzJH0fmAOsAn4HTAXeiIjW9Pq5wOByQ800yCDpGGD7iPiupCHAwIiYWm6hZtbYcsyDWxQRo9Z3QlI/4AhgOPAG8Avg0ErEt1angwySJgAHAMenh1YCP65kEGbWYCozyHAQ8HJELIyINcCvgP2BvpLWVr6GAPPKDTPLKOo/RMQXgbcBImIJUPaohpk1umwDDBmmkswBxkjaQpKAA4EXgPuBT6fXnAjcUW6kWRLcGklNpPlY0lZAe7kFmlkBVKAGFxGPAbcBTwLPkeSjicC5wNmSZgFbAdeWG2aWPrirgV8CH5B0EXA0cFG5BZpZgwuICo2iRsSFwIXrHJ4NjK7E/TtNcBFxo6SpJO1lgM9ExLRKFG5mjaoxVjJkXarVDKwhqXR69YPZpq5BFqNmGUX9BnAzsC3JiMZ/STq/2oGZWR2r0FKtastSgzsBGBkRKwEkXQw8BXyvmoGZWZ3KN9G3prIkuPnrXNeSHjOzTVTDP/BS0hUkuXoJ8Lykyen+wcATGyc8M6tLlVuLWlUd1eDWjpQ+D9xVcvzR6oVjZo1AjV6Di4iyJ9eZWYHVyQBCFp32wUnaAbgYGAFstvZ4ROxcxbjMrG6pYQYZssxpux74GcnMvsOAScCtVYzJzOpdg0wTyZLgtoiIyQAR8VJEXECS6MxsU9WecauxLNNE3kkX278kaRzJo0t6VzcsM6tbBZsHdxbQk+TZ6RcDWwKfr2ZQZlbfGn4Uda30kSYAy/nbQy/NbFPW6AlO0u108DUi4qiqRGRmViEd1eAmbLQoUtHaRtuixRu7WOuCB669r9YhWA6jD1lUkfs0fBM1Ivy/XDN7v6AQS7XMzNav0WtwZmYb0ihN1MxP55XUo5qBmFkDKcpKBkmjJT0HzEz395T0w6pHZmb1qygJDrgKOBxYDBARz5C8CNrMNkGK7FutZemDa4qIV5L3sr6rrUrxmFkjKNAo6quSRgMhqRkYD7xY3bDMrJ7VQ+0siywJ7jSSZup2wOvAvekxM9tUFSXBRcQC4JiNEIuZNYI66V/LIssTff+T9eTriDi1KhGZWf2rUIKT1Bf4KbB7etfPAzNIHqo7DPgLcHRELC3n/llGUe8F7ku3h4GtgXfKKczMikHt2bYMrgTujohdgT2B6cB5wH0RsRNJ3jmv3DizNFHf83hySf8feKjcAs3MACRtCXwUOAkgIlYDqyUdAYxNL7sBeAA4t5wyMq9kKDEcGFhOYWZWENkn+g6QNKVkK+3aGg4sBH4m6SlJP5XUExgYEWtfLv8aXcg3WfrglvK3FncTyYugy64ymlmDyzfIsCgiRm3gXAuwNzA+Ih6TdCXr5JaICKn8IY0OE5yS2b17kryHAaA9Ihpk/MTMqqYyWWAuMLfkqeG3kSS41yUNioj5kgYBC8otoMMmaprMfhMRbenm5GZmFVmLGhGvkSwk2CU9dCDwAnAncGJ67ETgjnLDzDLR92lJIyPiqXILMbPiEJlHSLMYD9wkqTswG/gXkorXJEknA68AR5d7847eydASEa3ASOAJSS8BK0i+X0TE3uUWamYNrIITfSPiaWB9fXQHVuL+HdXgHifpAPxUJQoyswJpkM6qjhKcIHmb/UaKxcwaRQES3Acknb2hkxFxeRXiMbMGUIS1qM1AL9KanJnZuwqQ4OZHxLc3WiRm1hiioqOoVdVpH5yZ2fsUoAZXkWFaMyuehu+Di4glGzMQM2sgjZ7gzMzWq05eCZiFE5yZ5SIK0EQ1M9sQJzgzKy4nODMrLCc4MyukIr020MzsfZzgzKyoirBUy8xsvdxENbNi8kRfMys0JzgzKyKvZDCzQlN7Y2Q4Jzgzy8d9cGZWZG6imllxOcGZWVG5BmdmxdUgCa6p1gGYWYNJ36qVZctCUrOkpyT9Ot0fLukxSbMk3Sqpe7mhOsGZWS5r58Fl2TL6MjC9ZP9S4IqI2BFYCpxcbqxOcGaWX0S2rROShgCfAH6a7gv4GHBbeskNwJHlhuk+ODPLLUftbICkKSX7EyNiYsn+D4BzgN7p/lbAGxHRmu7PBQaXG6cTXBX17NPGWd9/lWG7vk0EXH72UKZP7VnrsArnP84aymP39qHvgFYm3j/jfef/dHcfbrxsEBI0twTjLprH7vut6FKZby5t5rvjhvH63O4MHLKab/zkL/Tu28bvf9WPSVdvTQRs3rOd8Ze8yg67vd2lsupOvom+iyJi1PpOSDocWBARUyWNrUxw71W1Jqqk6yQtkDStWmXUu9O+PY8pD/TmlI/uymkH7cycmZvVOqRCOvizS7j4ptkbPD/yI29xzb0zuObeGZx9+Ryu+OrQzPd+5k+9+P6Z273v+KQJWzPyw8v52cPTGfnh5dw6YWsABg59h8t+OYuf/H4Gx531Gleek72sRlKhQYb9gU9J+gtwC0nT9Eqgr6S1la8hwLxy46xmH9z1wKFVvH9d26J3G3uMWcHd/9UfgNY1Tax4s7nGURXTHmNW0Ltf2wbPb96zHSn5/PbKpnc/A/ziRx9g/GE7M+7AXbjxsm0yl/nI5C056Ojk3egHHb2ER+7eEoDd9l1J775JLLvuvZJF87vl/DaNoRIJLiLOj4ghETEMOAb4fUQcB9wPfDq97ETgjnLjrFoTNSIelDSsWvevd9tst5pli5v5yhWvsv1uq5j57BZc881teWeVk1wtPPzbLbnuu4N4Y3EL/3ZjUtub+kBv5r3cg6t+8yIRcOFJw3nu0Z7sMabz5uvSRd3YamDSTdR/61aWLnp/Irv75v7se8Dyyn6RehBkGkDognOBWyR9B3gKuLbcG9W8D07SqcCpAJuxRY2jqZzm5mDHPVZx9QWDmfFUT8Z9ex6fPWMBN142qNahbZL2P2wZ+x+2jOce7ckN/z6ISye9xNQ/9ObJP/ThS/9nFwBWrWxi3uwe7DFmBf/6iZ1Y804Tq1Y2sfyNZk47KLnm5Av+yqix701aEmidXvenH+7F5Ju34vL/nrlxvuBGVumVDBHxAPBA+nk2MLoS9615gktHVCYC9FH/Bpkf3blF87uxcH43ZjyVDCo89OstOfqMBTWOyvYYs4LX5nRn2eJmAvjs+Nf5xPGL33fdVXcliemZP/Xinkn9+eoP5rznfL8Ba1j8egtbDWxl8est9N2q9d1zs1/YjB98dSjf+fls+vTfcNO5oTXIf6meB1clSxd2Y9FfuzNkh2QEba+PvOVBhhqZ93L3d1tUM5/dnDWrRZ/+bYz6x+VMvqU/q1Yk/xksmt+NNxZl+//8MQe/yb2Tkv7Veyf15+8PWQbAgrnd+PYpw/naVa8wZId3Kv9l6kAVJvpWTc1rcEV29QWDOXfCHFq6Ba/N6c5/nFXMEbVa+95pH+TZR3qxbEkLx+0zguO/8hqtrclIwuEnLOahu/py7239aGmBHpu38/VrXkGCfcYuZ86sHpz5yZ2AZDDinB++Qt8BnZf52TNe5+Jxw7j7lq3YenAyTQTgpiu2YfnSZiacn/ytm1uCCXe/WJXvXTMRDfPAS0WVOgsl3QyMBQYArwMXRkSHnYV91D/204FViceqY/Jfn651CJbD6ENeZcozb6vzKzesd98hMfKjX8507R//55ypG5oHtzFUcxT12Grd28xqqx6an1m4iWpm+QTQIE1UJzgzy68x8psTnJnl5yaqmRVWo4yiOsGZWT5+baCZFVUy0bcxMpwTnJnll/F9C7XmBGdmubkGZ2bF5D44MyuuxlmL6gRnZvm5iWpmhRTZX+pca05wZpafa3BmVliNkd+c4MwsP7U3RhvVCc7M8gk80dfMikmEJ/qaWYE5wZlZYTnBmVkhuQ/OzIqsUUZR/eJnM8spkiZqlq0DkoZKul/SC5Kel/Tl9Hh/SfdImpn+7FdupE5wZpZPUJEEB7QCX4mIEcAY4HRJI4DzgPsiYifgvnS/LE5wZpZfe8atAxExPyKeTD8vB6YDg4EjgBvSy24Ajiw3TPfBmVluOebBDZA0pWR/YkRMfN/9pGHASOAxYGBEzE9PvQYMLDdOJzgzyy97glsUEaM6ukBSL+CXwJkR8aakkmIipPJfUugEZ2b5REBbZUZRJXUjSW43RcSv0sOvSxoUEfMlDQIWlHt/98GZWX6VGUUVcC0wPSIuLzl1J3Bi+vlE4I5yw3QNzszyq8xKhv2B44HnJD2dHvs6cAkwSdLJwCvA0eUW4ARnZvkEUIF3MkTEQySvWV2fA7tcAE5wZpZbQDTGSgYnODPLJ6jYIEO1OcGZWX5+moiZFZYTnJkVU6Z1pnXBCc7M8gmgQR6X5ARnZvm5BmdmxVS5pVrV5gRnZvkEhOfBmVlhVWAlw8bgBGdm+bkPzswKKcKjqGZWYK7BmVkxBdHWVusgMnGCM7N8KvS4pI3BCc7M8vM0ETMrogDCNTgzK6TwAy/NrMAaZZBBUUfDvZIWkrxkomgGAItqHYTlUtS/2Qcj4gNduYGku0n+fbJYFBGHdqW8rqirBFdUkqZ09vJbqy/+mxWD34tqZoXlBGdmheUEt3FMrHUAlpv/ZgXgPjgzKyzX4MyssJzgzKywnOCqSNKhkmZImiXpvFrHY52TdJ2kBZKm1ToW6zonuCqR1AxcDRwGjACOlTSitlFZBtcDNZuYapXlBFc9o4FZETE7IlYDtwBH1Dgm60REPAgsqXUcVhlOcNUzGHi1ZH9ueszMNhInODMrLCe46pkHDC3ZH5IeM7ONxAmuep4AdpI0XFJ34BjgzhrHZLZJcYKrkohoBc4AJgPTgUkR8Xxto7LOSLoZeATYRdJcSSfXOiYrn5dqmVlhuQZnZoXlBGdmheUEZ2aF5QRnZoXlBGdmheUE10AktUl6WtI0Sb+QtEUX7jVW0q/Tz5/q6GknkvpK+lIZZXxL0lezHl/nmuslfTpHWcP8BBBblxNcY1kVEXtFxO7AamBc6Uklcv9NI+LOiLikg0v6ArkTnFmtOcE1rj8CO6Y1lxmSbgSmAUMlHSzpEUlPpjW9XvDu8+n+LOlJ4Ki1N5J0kqQJ6eeBkm6X9Ey6/QNwCbBDWnu8LL3ua5KekPSspItK7vUNSS9KegjYpbMvIekL6X2ekfTLdWqlB0makt7v8PT6ZkmXlZT9xa7+Q1pxOcE1IEktJM+Zey49tBPwo4jYDVgBXAAcFBF7A1OAsyVtBvwn8ElgH2CbDdz+KuAPEbEnsDfwPHAe8FJae/yapIPTMkcDewH7SPqopH1IlqTtBXwc2DfD1/lVROybljcdKF05MCwt4xPAj9PvcDKwLCL2Te//BUnDM5Rjm6CWWgdguWwu6en08x+Ba4FtgVci4tH0+BiSB2w+LAmgO8nSo12BlyNiJoCknwOnrqeMjwEnAEREG7BMUr91rjk43Z5K93uRJLzewO0RsTItI8va290lfYekGdyLZGnbWpMioh2YKWl2+h0OBj5U0j+3ZVr2ixnKsk2ME1xjWRURe5UeSJPYitJDwD0Rcew6173n97pIwPci4ifrlHFmGfe6HjgyIp6RdBIwtuTcuusIIy17fESUJkIkDSujbCs4N1GL51Fgf0k7AkjqKWln4M/AMEk7pNcdu4Hfvw84Lf3dZklbAstJamdrTQY+X9K3N1jS1sCDwJGSNpfUm6Q53JnewHxJ3YDj1jn3GUlNaczbAzPSsk9Lr0fSzpJ6ZijHNkGuwRVMRCxMa0I3S+qRHr4gIl6UdCpwl6SVJE3c3uu5xZeBielTNNqA0yLiEUkPp9Mwfpv2w/0d8Ehag3wL+L8R8aSkW4FngAUkj4zqzDeBx4CF6c/SmOYAjwN9gHER8bakn5L0zT2ppPCFwJHZ/nVsU+OniZhZYbmJamaF5QRnZoXlBGdmheUEZ2aF5QRnZoXlBGdmheUEZ2aF9b9IJVmmk3cd0AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "disp = metrics.plot_confusion_matrix(knn_clf, X_test_prepared, val_labels)\n", "plt.show(disp)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['knn_clf.pkl']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(knn_clf, \"knn_clf.pkl\")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 78.14207650273224\n", "Precision : 79.26829268292683\n", "Recall : 95.58823529411765\n", "Specificity : 27.659574468085108\n", "H-mean : 42.90429042904291\n", "F1 score : 86.66666666666666\n" ] } ], "source": [ "preds = knn_clf.predict(X_test_prepared)\n", "tn, fp, fn, tp = metrics.confusion_matrix(val_labels, preds).ravel()\n", "sens = (tp / (tp + fn)) * 100\n", "spec = (tn / (tn + fp)) * 100\n", "prec = (tp / (tp + fp)) * 100\n", "accuracy = ((tp + tn) / (tp + tn + fn + fp)) * 100\n", "h_mean = 2 * ((sens * spec) / (sens + spec))\n", "f1 = 2 * ((prec * sens) / (prec + sens))\n", "\n", "print(f\"Accuracy : {accuracy}\\nPrecision : {prec}\\nRecall : {sens}\\nSpecificity : {spec}\\nH-mean : {h_mean}\\nF1 score : {f1}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Forest Classier" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", " criterion='gini', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf_clf = RandomForestClassifier()\n", "rf_clf" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", " criterion='gini', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rf_clf.fit(X_train_prepared, train_labels)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7486338797814208" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import metrics\n", "preds = rf_clf.predict(X_test_prepared)\n", "metrics.accuracy_score(val_labels, preds)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAZK0lEQVR4nO3de7xVdZ3/8df7nMNFuQooIZAwaiiZJSJSlqGoeSudfmo6jmlZDpZmOmZqzjA2mTY2mmZaTF5wclRSGy1TfmoXU/GCKF5DEAMh7igiKHDO+cwfa0FbhHP22mdv9t6L9/PxWA/2upzv97M58vF7Wd+1FBGYmeVRQ7UDMDOrFCc4M8stJzgzyy0nODPLLSc4M8utpmoHUKizukRXulU7DMugpa9/X/VkzdvLaX53lTpSxmcO6BbLlrcUde3Tz62ZHBGHdqS+jqipBNeVbuzbcFC1w7AM3vjs6GqHYBm8/OsrO1zGsuUtPDn5g0Vd2zhgZr8OV9gBNZXgzKz2BdBKa7XDKIoTnJllEgTrorguarU5wZlZZm7BmVkuBUFLnSzxdIIzs8xacYIzsxwKoMUJzszyyi04M8ulANZ5DM7M8igId1HNLKcCWuojvznBmVk2yUqG+uAEZ2YZiRY6tF5/i3GCM7NMkkkGJzgzy6HkPjgnODPLqVa34Mwsj9yCM7PcCkRLnbztoD6iNLOa0hoqamuPpBskLZb0QsGxyyX9WdJzkn4lqXfBuQskzZI0Q9Jn2ivfCc7MMgnE2mgsaivCTcDG72x4ANgjIvYEXgEuAJA0HDge+HD6M9dKarMSJzgzyyS50behqK3dsiIeBpZvdOz/R0Rzuvs4MCj9fBRwW0SsiYjXgFnAqLbK9xicmWWWYZKhn6SpBfsTImJChqq+DNyefh5IkvDWm5ce2ywnODPLJEK0RNGdv6URMbKUeiR9B2gGbinl58EJzsxK0Frh20QknQIcCYyN2PBspvnA4ILLBqXHNssJzswySSYZKpc6JB0KnAd8OiJWF5y6B/gfSVcAOwK7Ak+2VZYTnJllsn6SoRwk3QqMIRmrmweMJ5k17QI8IAng8YgYFxEvSpoEvETSdf16RNvvL3SCM7PMWsq0VCsiTtjE4evbuP4S4JJiy3eCM7NM6mklgxOcmWXWWvwsalU5wZlZJslieyc4M8uhQKwrbhlW1TnBmVkmEWS50beqnODMLCNV/EbfcnGCM7NMArfgzCzHPMlgZrkUFPcwy1rgBGdmmSSvDayP1FEfUZpZDfGLn80spwKvZDCzHHMLzsxyKUJuwZlZPiWTDF6qZWa5lOmdDFXlBGdmmSSTDB6DM7Oc8koGM8slr2Qws1wr10tnKs0JzswyiYB1rU5wZpZDSRfVCc7McsorGQyAhobgx/e9wrKFnfjXk/+u2uHYZjSolYln3MmSt7pxzsTDuejzf2D3QUuAYO7S3nz3jgN4Z22naodZE+rpNpGKtjMlHSpphqRZks6vZF216uivLOH1mV2qHYa14/j9nucvi7fbsH/lvZ/gxKuP5cSrj2PRiu4c+/EXqhhdrUm6qMVs7ZYk3SBpsaQXCo71kfSApJnpn9ulxyXp6jSfPCdpRHvlVyzBSWoEfgIcBgwHTpA0vFL11aJ+A9Yyauxb3Hdr32qHYm3Yoefb7DdsLnc/tfuGY6vWdE4/BV2ampNmi23Qmr6Xob2tCDcBh2507HzgoYjYFXgo3Yckl+yabqcB17VXeCVbcKOAWRExOyLWArcBR1Wwvpoz7uL5/Px7OxKt1Y7E2nL2kY/x4/tG07pREvuX//d77rvwZnba/k1un7JHdYKrQcksamNRW/tlxcPA8o0OHwVMTD9PBI4uOH5zJB4Heksa0Fb5lUxwA4HXC/bnpcfeQ9JpkqZKmrqONRUMZ8va96AVvLm0iVnPb1vtUKwNn9xtDm+s6sqf/7r9+879+50HcMSlJ/GXJb05eM9XqxBdbVp/o28xW4n6R8SC9PNCoH/6uaicUqjqkwwRMQGYANBTfXLTERg+chWjD3mLfQ58kc5dgm17tHDe1XP4j2/sVO3QrMCeOy3kU7vP4RPDfkGXpha6dVnHxcc9xPhJY4HkwY4PTN+Fkz79LL95ercqR1s7Mrw2sJ+kqQX7E9J/80WJiJBUcl6oZIKbDwwu2B+UHtsq3HjZjtx42Y4A7PnxlRwzbomTWw26dvK+XDt5XwBGDJ3PP+4/nfGTDmRQ3xXMW9YLCD61+5z3TEBs7TLOoi6NiJEZq1gkaUBELEi7oIvT45lzSiUT3FPArpKGpkEcD/xDBeszKwsJxh/zO7p1XYcIZi7syw/+d/9qh1VTKnyj7z3AycBl6Z93Fxw/Q9JtwL7AioKu7CZVLMFFRLOkM4DJQCNwQ0S8WKn6atlzU3rw3JQe1Q7D2jHttYFMey0Z0vnqz/6+ytHUrgjRXKYEJ+lWYAxJV3YeMJ4ksU2SdCowBzguvfy3wOHALGA18KX2yq/oGFxE/DYNysxypFw3+kbECZs5NXYT1wbw9SzlV32SwczqSz2tZHCCM7PMnODMLJf8wEszy7UM98FVlROcmWUSAc1+4KWZ5ZW7qGaWSx6DM7NcCyc4M8srTzKYWS5FeAzOzHJLtHgW1czyymNwZpZLXotqZvkVyThcPXCCM7PMPItqZrkUnmQwszxzF9XMcsuzqGaWSxFOcGaWY75NxMxyy2NwZpZLgWj1LKqZ5VWdNOCc4MwsI08ymFmu1UkTbrMJTlLPtn4wIt4qfzhmVg/K1YKTdDbwFZKU+TzwJWAAcBvQF3gaOCki1pZSflstuBfTSgu/yfr9AD5YSoVmVt8CaG3teIKTNBD4BjA8It6RNAk4HjgcuDIibpP0U+BU4LpS6thsgouIwaUUaGY5F0D5xuCagG0krQO2BRYABwL/kJ6fCPwbJSa4ouZ6JR0v6cL08yBJe5dSmZnlQ0RxG9BP0tSC7bS/lRHzgR8Cc0kS2wqSLumbEdGcXjYPGFhqnO1OMki6BugE7A98H1gN/BTYp9RKzazOFT/JsDQiRm7qhKTtgKOAocCbwC+BQ8sR3nrFzKJ+IiJGSHoGICKWS+pcziDMrJ6oXJMMBwGvRcQSAEl3AfsBvSU1pa24QcD8Uisopou6TlIDac6W1BdoLbVCM8uBKHJr21xgtKRtJQkYC7wE/B44Jr3mZODuUsMsJsH9BLgT2F7SxcAjwA9KrdDM6lxAtKqorc1iIp4A7gCmkdwi0gBMAL4NnCNpFsmtIteXGmq7XdSIuFnS0yTNSYBjI+KFUis0szwozyxqRIwHxm90eDYwqhzlF7uSoRFYR9LorI9VtmZWOXWykqHdZCXpO8CtwI4kA37/I+mCSgdmZjWsPGNwFVdMC+6LwF4RsRpA0iXAM8CllQzMzGpUeW/0rahiEtyCja5rSo+Z2Vaq7h94KelKkly9HHhR0uR0/xDgqS0TnpnVpDKsRd0S2mrBrZ8pfRG4t+D445ULx8zqgeq9BRcRJd97YmY5ViMTCMUoZi3qzsAlwHCg6/rjEfGhCsZlZjVLdTPJUMw9bTcBN5Lc2XcYMAm4vYIxmVmtq5PbRIpJcNtGxGSAiHg1Ii4iSXRmtrVqLXKrsmJuE1mTLrZ/VdI4kpX9PSoblpnVrJzdB3c20I3k0cKXAL2AL1cyKDOrbXU/i7peuuIfYCVwUmXDMbO6UO8JTtKvaONrRMTnKxKRmVmZtNWCu2aLRVGoXtaAGABPXlrSu0CsSkZNW1KWcuq+ixoRD23JQMysTgS5WKplZrZp9d6CMzPbnHrpohb9dF5JXSoZiJnVkbysZJA0StLzwMx0/6OSflzxyMysduUlwQFXA0cCywAiYjpwQCWDMrPapSh+q7ZixuAaImJO8trCDVoqFI+Z1YMczaK+LmkUEJIagTOBVyoblpnVslponRWjmAR3Okk39YPAIuDB9JiZba3ykuAiYjFw/BaIxczqQRnH1yT1Bn4O7JGUzJeBGSTPnBwC/AU4LiLeKKX8Yp7o+19sIl9HxGmlVGhmOVC+FtxVwP0RcYykzsC2wIXAQxFxmaTzgfOBb5dSeDFd1AcLPncF/h54vZTKzCwfVIaHWUrqBewPnAIQEWuBtZKOAsakl00E/kClElxEvOfx5JL+G3iklMrMbKvTT9LUgv0JETEh/TwUWALcKOmjwNPAWUD/iFj/7uWFQP9SKy9lqdbQjlRoZjlQfBd1aUSM3My5JmAEcGZEPCHpKpLu6N+qiQip9BG/Ysbg3uBvX6eB5EXQ52/+J8ws18o3yTAPmFfwUN07SHLLIkkDImKBpAHA4lIraDPBKbm796Mk72EAaI3wA9vMtnplyAIRsVDS65KGRcQMYCzwUrqdDFyW/nl3qXW0meDS5uFvI2KPUiswsxwqXzPnTOCWdAZ1NvAlkp7iJEmnAnOA40otvJgxuGcl7RURz5RaiZnlhyjPLCpARDwLbGqMbmw5ym/rnQxNEdEM7AU8JelVYBXJ94uIGFGOAMysztTIQvpitNWCe5JkhuNzWygWM6sXOUhwguRt9lsoFjOrFzlIcNtLOmdzJyPiigrEY2Z1IA9d1EagO2lLzsxsgxwkuAUR8d0tFomZ1Yco3yxqpbU7Bmdm9j45aMGV5T4UM8ufuh+Di4jlWzIQM6sj9Z7gzMw2qUZeCVgMJzgzy0TkoItqZrY5TnBmll9OcGaWW05wZpZLOXmaiJnZpjnBmVle5WGplpnZJrmLamb55Bt9zSzXnODMLI+8ksHMck2t9ZHhnODMLBuPwZlZnrmLamb5VScJrqHaAZhZ/VEUtxVVltQo6RlJv0n3h0p6QtIsSbdL6lxqnE5wZpZdFLkV5yzg5YL9HwBXRsQuwBvAqaWG6QRnZtmkb9UqZmuPpEHAEcDP030BBwJ3pJdMBI4uNVSPwZlZJhnvg+snaWrB/oSImFCw/yPgPKBHut8XeDMimtP9ecDAUmN1gjOz7KLoDLc0IkZu6oSkI4HFEfG0pDHlCq2QE5yZZVam20T2Az4n6XCgK9ATuAroLakpbcUNAuaXWoETXAV16tLKf941i06dg8am4E/39ua/f/iBaoeVO/959mCeeLAnvfs1M+H3M953/rH7e3Lz5QOQoLEpGHfxfPbYd1WH6nzrjUa+P24Ii+Z1pv+gtXznZ3+hR+8WfnfXdkz6yQ5EwDbdWjnzstfZ+cPvdqiumlOmG30j4gLgAoC0BXduRJwo6ZfAMcBtwMnA3aXWUbFJBkk3SFos6YVK1VHr1q0R5x27M6cfPIzTDx7GyDEr2W1Ex/5h2fsd8oXlXHLL7M2e3+tTb3PdgzO47sEZnHPFXK48d3DRZU9/rDs//OYH33d80jU7sNcnV3Ljoy+z1ydXcvs1OwDQf/AaLr9zFj/73QxOPHshV51XfF31pFyTDJvxbeAcSbNIxuSuL7WgSs6i3gQcWsHy64B4d3UjAE2dgsZOkWHowor1kdGr6LFdy2bPb9OtFSn5/O7qhg2fAX557facediHGDd2GDdfXnzresrkXhx0XPJu9IOOW86U+3sB8OF9VtOjdxLLbiNWs3RBp4zfpj6UO8FFxB8i4sj08+yIGBURu0TEsRGxptQ4K9ZFjYiHJQ2pVPn1oqEhuGbyK+w4ZC2/vqkvM57pVu2QtkqP3teLG74/gDeXNfHvNyetvaf/0IP5r3Xh6t++QgSMP2Uozz/ejY+Mbr+V/cbSTvTtn0z09dmhmTeWvj+R3X9rH/Y5YGV5v0gtCLJMMlRV1cfgJJ0GnAbQlW2rHE35tbaKrx08jG49Wxh//WvsNOwd5szYptphbXX2O2wF+x22gucf78bE/xjADya9ytN/7MG0P/bkawcPA+Cd1Q3Mn92Fj4xexTeO2JV1axp4Z3UDK99s5PSDkmtOveivjBzz3qQlgTYadX/20e5MvrUvV/zvzC3zBbcwr0UtUnpPzASAnupTJ39t2a16q5Hpj3VnnwNWOsFV0UdGr2Lh3M6sWNZIAF84cxFHnLTsfdddfW+SmKY/1p0HJvXh3B/Nfc/57fqtY9miJvr2b2bZoiZ6923ecG72S1350bmD+d4vZtOzz+a7znWtTv6leiVDBfXq00y3nsl/4J27tjJi/7d5fVbXKke19Zn/WucNPaqZz23DurWiZ58WRn56JZNv68M7q5J/BksXdOLNpcX9P3/0IW/x4KQ+ADw4qQ8f/8wKABbP68R3vzKUb109h0E7lzx0VNPW3+hbrrWolVT1Flye9em/jnOvmktDAzQ0wMO/7sUTD/asdli5c+npO/HclO6sWN7EiXsP56R/XkhzczKTcOQXl/HIvb158I7taGqCLtu0cuF1c5Bg7zErmTurC9/87K5AMhlx3o/n0Ltf+3V+4YxFXDJuCPff1pcdBia3iQDccuUHWPlGI9dckMyeNjYF19z/SkW+d9VE1M0DLxUVGiyUdCswBugHLALGR0Sb07091Sf21diKxGOVMfmvz1Y7BMtg1GdeZ+r0d9X+lZvXo/eg2Gv/s4q69k+/Pu/pza1k2BIqOYt6QqXKNrPqqoXuZzHcRTWzbAKoky6qE5yZZVcf+c0JzsyycxfVzHKrXmZRneDMLBu/NtDM8iq50bc+MpwTnJllV/qjkLYoJzgzy8wtODPLJ4/BmVl+1c9aVCc4M8vOXVQzy6Xo0PsWtignODPLzi04M8ut+shvTnBmlp1a66OP6gRnZtkEvtHXzPJJRN3c6OuXzphZdhHFbW2QNFjS7yW9JOlFSWelx/tIekDSzPTP7UoN0wnOzLIrQ4IDmoF/jojhwGjg65KGA+cDD0XErsBD6X5JnODMLJv1Y3DFbG0VE7EgIqaln1cCLwMDgaOAiellE4GjSw3VY3BmllmGWdR+kqYW7E9IX/b+3vKkIcBewBNA/4hYkJ5aCPQvNU4nODPLqKju53pL23ttoKTuwJ3ANyPiLelvbzWMiJBKf0C6u6hmlk1QrjE4JHUiSW63RMRd6eFFkgak5wcAi0sN1QnOzLIrwxickqba9cDLEXFFwal7gJPTzycDd5capruoZpZZme6D2w84CXhe0rPpsQuBy4BJkk4F5gDHlVqBE5yZZVeGBBcRj5C84mFTxna4ApzgzCyrCGipj7VaTnBmll2dLNVygjOz7JzgzCyXAvA7GcwsnwLCY3BmlkeBJxnMLMc8BmdmueUEZ2b5lGmxfVU5wZlZNgH4pTNmlltuwZlZPnmplpnlVUD4Pjgzyy2vZDCz3PIYnJnlUoRnUc0sx9yCM7N8CqKlpdpBFMUJzsyy8eOSzCzXfJuImeVRAOEWnJnlUviBl2aWY/UyyaCooeleSUtIXvSaN/2ApdUOwjLJ6+9sp4jYviMFSLqf5O+nGEsj4tCO1NcRNZXg8krS1IgYWe04rHj+neVDQ7UDMDOrFCc4M8stJ7gtY0K1A7DM/DvLAY/BmVluuQVnZrnlBGdmueUEV0GSDpU0Q9IsSedXOx5rn6QbJC2W9EK1Y7GOc4KrEEmNwE+Aw4DhwAmShlc3KivCTUDVbky18nKCq5xRwKyImB0Ra4HbgKOqHJO1IyIeBpZXOw4rDye4yhkIvF6wPy89ZmZbiBOcmeWWE1zlzAcGF+wPSo+Z2RbiBFc5TwG7ShoqqTNwPHBPlWMy26o4wVVIRDQDZwCTgZeBSRHxYnWjsvZIuhWYAgyTNE/SqdWOyUrnpVpmlltuwZlZbjnBmVluOcGZWW45wZlZbjnBmVluOcHVEUktkp6V9IKkX0ratgNljZH0m/Tz59p62omk3pK+VkId/ybp3GKPb3TNTZKOyVDXED8BxDbmBFdf3omIj0XEHsBaYFzhSSUy/04j4p6IuKyNS3oDmROcWbU5wdWvPwG7pC2XGZJuBl4ABks6RNIUSdPSll532PB8uj9LmgZ8fn1Bkk6RdE36ub+kX0manm6fAC4Ddk5bj5en131L0lOSnpN0cUFZ35H0iqRHgGHtfQlJX03LmS7pzo1apQdJmpqWd2R6faOkywvq/qeO/kVafjnB1SFJTSTPmXs+PbQrcG1EfBhYBVwEHBQRI4CpwDmSugL/BXwW2Bv4wGaKvxr4Y0R8FBgBvAicD7yath6/JemQtM5RwMeAvSXtL2lvkiVpHwMOB/Yp4uvcFRH7pPW9DBSuHBiS1nEE8NP0O5wKrIiIfdLyvyppaBH12FaoqdoBWCbbSHo2/fwn4HpgR2BORDyeHh9N8oDNRyUBdCZZerQb8FpEzASQ9AvgtE3UcSDwRYCIaAFWSNpuo2sOSbdn0v3uJAmvB/CriFid1lHM2ts9JH2PpBvcnWRp23qTIqIVmClpdvodDgH2LBif65XW/UoRddlWxgmuvrwTER8rPJAmsVWFh4AHIuKEja57z891kIBLI+JnG9XxzRLKugk4OiKmSzoFGFNwbuN1hJHWfWZEFCZCJA0poW7LOXdR8+dxYD9JuwBI6ibpQ8CfgSGSdk6vO2EzP/8QcHr6s42SegErSVpn600GvlwwtjdQ0g7Aw8DRkraR1IOkO9yeHsACSZ2AEzc6d6ykhjTmvwNmpHWfnl6PpA9J6lZEPbYVcgsuZyJiSdoSulVSl/TwRRHxiqTTgHslrSbp4vbYRBFnARPSp2i0AKdHxBRJj6a3YdyXjsPtDkxJW5BvA/8YEdMk3Q5MBxaTPDKqPf8CPAEsSf8sjGku8CTQExgXEe9K+jnJ2Nw0JZUvAY4u7m/HtjZ+moiZ5Za7qGaWW05wZpZbTnBmlltOcGaWW05wZpZbTnBmlltOcGaWW/8HSrMQHT9XvekAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "metrics.plot_confusion_matrix(rf_clf, X_test_prepared, val_labels)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['rf_clf.pkl']" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(rf_clf, \"rf_clf.pkl\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 74.86338797814209\n", "Precision : 75.56818181818183\n", "Recall : 97.79411764705883\n", "Specificity : 8.51063829787234\n", "H-mean : 15.658572479764532\n", "F1 score : 85.25641025641026\n" ] } ], "source": [ "preds = rf_clf.predict(X_test_prepared)\n", "tn, fp, fn, tp = metrics.confusion_matrix(val_labels, preds).ravel()\n", "sens = (tp / (tp + fn)) * 100\n", "spec = (tn / (tn + fp)) * 100\n", "prec = (tp / (tp + fp)) * 100\n", "accuracy = ((tp + tn) / (tp + tn + fn + fp)) * 100\n", "h_mean = 2 * ((sens * spec) / (sens + spec))\n", "f1 = 2 * ((prec * sens) / (prec + sens))\n", "\n", "print(f\"Accuracy : {accuracy}\\nPrecision : {prec}\\nRecall : {sens}\\nSpecificity : {spec}\\nH-mean : {h_mean}\\nF1 score : {f1}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }