{ "cells": [ { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "import numpy as np \n", "import pandas as pd\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import metrics\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "import matplotlib.image as mpimg\n", "from sklearn import tree\n", "from sklearn import preprocessing\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.neural_network import MLPClassifier\n", "%matplotlib inline " ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(520, 17)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=pd.read_csv(\"D:/Datasets/diabetes_data_upload.csv\", delimiter=\",\")\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "label_enco = preprocessing.LabelEncoder() #Label encoding\n", "df['Gender']=label_enco.fit_transform(df['Gender'])\n", "df['Polyuria']=label_enco.fit_transform(df['Polyuria'])\n", "df['Polydipsia']=label_enco.fit_transform(df['Polydipsia'])\n", "df['suddenweightloss']=label_enco.fit_transform(df['suddenweightloss'])\n", "df['weakness']=label_enco.fit_transform(df['weakness'])\n", "df['Polyphagia']=label_enco.fit_transform(df['Polyphagia'])\n", "df['Genitalthrush']=label_enco.fit_transform(df['Genitalthrush'])\n", "df['visualblurring']=label_enco.fit_transform(df['visualblurring'])\n", "df['Itching']=label_enco.fit_transform(df['Itching'])\n", "df['Irritability']=label_enco.fit_transform(df['Irritability'])\n", "df['delayedhealing']=label_enco.fit_transform(df['delayedhealing'])\n", "df['partialparesis']=label_enco.fit_transform(df['partialparesis'])\n", "df['musclestiffness']=label_enco.fit_transform(df['musclestiffness'])\n", "df['Alopecia']=label_enco.fit_transform(df['Alopecia'])\n", "df['Obesity']=label_enco.fit_transform(df['Obesity'])\n", "df['Target']=label_enco.fit_transform(df['Target'])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeGenderPolyuriaPolydipsiasuddenweightlossweaknessPolyphagiaGenitalthrushvisualblurringItchingIrritabilitydelayedhealingpartialparesismusclestiffnessAlopeciaObesityTarget
0401010100010101111
1581000100100010101
2411100110010101101
\n", "
" ], "text/plain": [ " Age Gender Polyuria Polydipsia suddenweightloss weakness Polyphagia \\\n", "0 40 1 0 1 0 1 0 \n", "1 58 1 0 0 0 1 0 \n", "2 41 1 1 0 0 1 1 \n", "\n", " Genitalthrush visualblurring Itching Irritability delayedhealing \\\n", "0 0 0 1 0 1 \n", "1 0 1 0 0 0 \n", "2 0 0 1 0 1 \n", "\n", " partialparesis musclestiffness Alopecia Obesity Target \n", "0 0 1 1 1 1 \n", "1 1 0 1 0 1 \n", "2 0 1 1 0 1 " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(3)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "X=df[['Age','Gender','Polyuria','Polydipsia','suddenweightloss','weakness','Polyphagia','Genitalthrush','visualblurring','Itching','Irritability','delayedhealing','partialparesis','musclestiffness','Alopecia','Obesity']]" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "Y=df['Target']" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(520, 16)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=3)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(364, 16) n (156, 16)\n" ] } ], "source": [ "print(X_trainset.shape,\"n\",X_testset.shape)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(364,) (156,)\n" ] } ], "source": [ "print(y_trainset.shape,\" \",y_testset.shape)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(criterion='entropy', max_depth=5)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drugTree = DecisionTreeClassifier(criterion=\"entropy\", max_depth = 5)\n", "drugTree" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(criterion='entropy', max_depth=5)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drugTree.fit(X_trainset,y_trainset)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "predTree = drugTree.predict(X_testset)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9423076923076923\n" ] } ], "source": [ "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predTree))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9347826086956521\n", "0.9423076923076923\n", "0.9498327759197324\n" ] } ], "source": [ "print(precision_score(y_testset, predTree, average='macro'))\n", "print(precision_score(y_testset, predTree, average='micro'))\n", "print(precision_score(y_testset, predTree, average='weighted'))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9423076923076923\n" ] } ], "source": [ "print(recall_score(y_testset, predTree, average='micro'))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.87 1.00 0.93 60\n", " 1 1.00 0.91 0.95 96\n", "\n", " accuracy 0.94 156\n", " macro avg 0.93 0.95 0.94 156\n", "weighted avg 0.95 0.94 0.94 156\n", "\n" ] } ], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_testset,predTree))" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=3)\n", "gnb = GaussianNB()\n", "nfit=gnb.fit(X_trainset,y_trainset)\n", "npred=nfit.predict(X_testset)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.90 0.88 0.89 60\n", " 1 0.93 0.94 0.93 96\n", "\n", " accuracy 0.92 156\n", " macro avg 0.91 0.91 0.91 156\n", "weighted avg 0.92 0.92 0.92 156\n", "\n" ] } ], "source": [ "print(classification_report(y_testset,npred))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9166666666666666\n" ] } ], "source": [ "print(\"Accuracy: \", metrics.accuracy_score(y_testset,npred))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9743589743589743\n", "Confusion Matrix:\n", "[[60 2]\n", " [ 2 92]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.97 0.97 0.97 62\n", " 1 0.98 0.98 0.98 94\n", "\n", " accuracy 0.97 156\n", " macro avg 0.97 0.97 0.97 156\n", "weighted avg 0.97 0.97 0.97 156\n", "\n", "RandomForestClassifier(n_estimators=30)\n" ] } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "#Create a Gaussian Classifier\n", "clf=RandomForestClassifier(n_estimators=30)\n", "\n", "#Train the model using the training sets y_pred=clf.predict(X_test)\n", "clf.fit(X_trainset,y_trainset)\n", "\n", "yhat4=clf.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat4))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat4))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat4))\n", "print(clf)" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LogisticRegression(C=0.01)" ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import confusion_matrix\n", "LR = LogisticRegression(C=0.01, solver='lbfgs').fit(X_trainset,y_trainset)\n", "LR" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,\n", " 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,\n", " 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,\n", " 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,\n", " 1, 1])" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "yhat = LR.predict(X_testset)\n", "yhat" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "yhat_prob = LR.predict_proba(X_testset)" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.95 0.58 0.72 60\n", " 1 0.79 0.98 0.87 96\n", "\n", " accuracy 0.83 156\n", " macro avg 0.87 0.78 0.80 156\n", "weighted avg 0.85 0.83 0.82 156\n", "\n" ] } ], "source": [ "print (classification_report(y_testset, yhat))" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8269230769230769\n" ] } ], "source": [ "print(\"Accuracy: \", metrics.accuracy_score(y_testset, yhat))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Learning rate: 0.05\n", "Accuracy score (training): 0.964\n", "Accuracy score (validation): 0.942\n", "Learning rate: 0.075\n", "Accuracy score (training): 0.959\n", "Accuracy score (validation): 0.955\n", "Learning rate: 0.1\n", "Accuracy score (training): 0.959\n", "Accuracy score (validation): 0.955\n", "Learning rate: 0.25\n", "Accuracy score (training): 0.992\n", "Accuracy score (validation): 0.981\n", "Learning rate: 0.5\n", "Accuracy score (training): 0.997\n", "Accuracy score (validation): 0.987\n", "Learning rate: 0.75\n", "Accuracy score (training): 1.000\n", "Accuracy score (validation): 0.968\n", "Learning rate: 1\n", "Accuracy score (training): 1.000\n", "Accuracy score (validation): 0.987\n" ] } ], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", "lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]\n", "\n", "for learning_rate in lr_list:\n", " gb_clf = GradientBoostingClassifier(n_estimators=19, learning_rate=learning_rate, max_features=2, max_depth=4, random_state=0)\n", " gb_clf.fit(X_trainset,y_trainset)\n", "\n", " print(\"Learning rate: \", learning_rate)\n", " print(\"Accuracy score (training): {0:.3f}\".format(gb_clf.score(X_trainset,y_trainset)))\n", " print(\"Accuracy score (validation): {0:.3f}\".format(gb_clf.score(X_testset,y_testset)))" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Confusion Matrix:\n", "[[60 2]\n", " [ 4 90]]\n", "Accuracy: 0.9615384615384616\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.94 0.97 0.95 62\n", " 1 0.98 0.96 0.97 94\n", "\n", " accuracy 0.96 156\n", " macro avg 0.96 0.96 0.96 156\n", "weighted avg 0.96 0.96 0.96 156\n", "\n" ] }, { "data": { "text/plain": [ "GradientBoostingClassifier(learning_rate=0.3, max_depth=4, max_features=2,\n", " n_estimators=11, random_state=0)" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gb_clf1 = GradientBoostingClassifier(n_estimators=11, learning_rate=0.3, max_features=2, max_depth=4, random_state=0)\n", "gb_clf1.fit(X_trainset,y_trainset)\n", "predictions = gb_clf1.predict(X_testset)\n", "\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, predictions))\n", "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predictions))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, predictions))\n", "gb_clf1" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9487179487179487\n" ] } ], "source": [ "from sklearn.ensemble import AdaBoostClassifier\n", "abc = AdaBoostClassifier(n_estimators=20,\n", " learning_rate=0.5)\n", "# Train Adaboost Classifer\n", "model = abc.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred = model.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.6474358974358975\n", "Confusion Matrix:\n", "[[14 46]\n", " [ 9 87]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.61 0.23 0.34 60\n", " 1 0.65 0.91 0.76 96\n", "\n", " accuracy 0.65 156\n", " macro avg 0.63 0.57 0.55 156\n", "weighted avg 0.64 0.65 0.60 156\n", "\n" ] } ], "source": [ "from sklearn.ensemble import AdaBoostClassifier\n", "\n", "# Import Support Vector Classifier\n", "from sklearn.svm import SVC\n", "#Import scikit-learn metrics module for accuracy calculation\n", "from sklearn import metrics\n", "svc=SVC(probability=True, kernel='rbf')\n", "#LR = LogisticRegression(C=0.001, solver='liblinear')\n", "\n", "# Create adaboost classifer object\n", "abc =AdaBoostClassifier(n_estimators=20, base_estimator=svc,learning_rate=0.5)\n", "\n", "# Train Adaboost Classifer\n", "model = abc.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred = model.predict(X_testset)\n", "\n", "\n", "# Model Accuracy, how often is the classifier correct?\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8205128205128205\n", "Confusion Matrix:\n", "[[57 3]\n", " [25 71]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.70 0.95 0.80 60\n", " 1 0.96 0.74 0.84 96\n", "\n", " accuracy 0.82 156\n", " macro avg 0.83 0.84 0.82 156\n", "weighted avg 0.86 0.82 0.82 156\n", "\n" ] } ], "source": [ "from sklearn.datasets import make_classification\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "# define the base models\n", "models = list()\n", "models.append(('knn1', KNeighborsClassifier(n_neighbors=1)))\n", "models.append(('knn3', KNeighborsClassifier(n_neighbors=3)))\n", "models.append(('knn5', KNeighborsClassifier(n_neighbors=5)))\n", "models.append(('knn7', KNeighborsClassifier(n_neighbors=7)))\n", "models.append(('knn9', KNeighborsClassifier(n_neighbors=9)))\n", "models.append(('knn11', KNeighborsClassifier(n_neighbors=11)))\n", "# define the hard voting ensemble\n", "ensemble = VotingClassifier(estimators=models, voting='hard')\n", "# fit the model on all available data\n", "ensemble.fit(X_trainset,y_trainset)\n", "# make a prediction for one example\n", "yhat = ensemble.predict(X_testset)\n", "# Model Accuracy, how often is the classifier correct?\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8910256410256411\n", "Confusion Matrix:\n", "[[57 3]\n", " [14 82]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.80 0.95 0.87 60\n", " 1 0.96 0.85 0.91 96\n", "\n", " accuracy 0.89 156\n", " macro avg 0.88 0.90 0.89 156\n", "weighted avg 0.90 0.89 0.89 156\n", "\n" ] } ], "source": [ "from sklearn.datasets import make_classification\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.svm import SVC\n", "# define dataset\n", "X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=2)\n", "# define the base models\n", "models = list()\n", "models.append(('svm1', SVC(probability=True, kernel='poly', degree=1)))\n", "models.append(('svm2', SVC(probability=True, kernel='poly', degree=2)))\n", "models.append(('svm3', SVC(probability=True, kernel='poly', degree=3)))\n", "models.append(('svm4', SVC(probability=True, kernel='poly', degree=4)))\n", "models.append(('svm5', SVC(probability=True, kernel='poly', degree=5)))\n", "# define the soft voting ensemble\n", "ensemble = VotingClassifier(estimators=models, voting='soft')\n", "# fit the model on all available data\n", "ensemble.fit(X_trainset,y_trainset)\n", "# make a prediction for one example\n", "yhat = ensemble.predict(X_testset)\n", "# Model Accuracy, how often is the classifier correct?\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat))" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9423076923076923\n", "Confusion Matrix:\n", "[[58 2]\n", " [ 7 89]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.89 0.97 0.93 60\n", " 1 0.98 0.93 0.95 96\n", "\n", " accuracy 0.94 156\n", " macro avg 0.94 0.95 0.94 156\n", "weighted avg 0.95 0.94 0.94 156\n", "\n" ] } ], "source": [ "from sklearn.ensemble import BaggingClassifier\n", "# define dataset\n", "model = BaggingClassifier(n_estimators=10)\n", "# fit the model on the whole dataset\n", "model.fit(X_trainset,y_trainset)\n", "yhat2 = model.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat2))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat2))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat2))" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9551282051282052\n", "Confusion Matrix:\n", "[[58 2]\n", " [ 5 91]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.92 0.97 0.94 60\n", " 1 0.98 0.95 0.96 96\n", "\n", " accuracy 0.96 156\n", " macro avg 0.95 0.96 0.95 156\n", "weighted avg 0.96 0.96 0.96 156\n", "\n", "RandomForestClassifier(n_estimators=50)\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "#Create a Gaussian Classifier\n", "clf=RandomForestClassifier(n_estimators=50)\n", "\n", "#Train the model using the training sets y_pred=clf.predict(X_test)\n", "clf.fit(X_trainset,y_trainset)\n", "\n", "yhat4=clf.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat4))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat4))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat4))\n", "print(clf)" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.6153846153846154\n", "Confusion Matrix:\n", "[[ 0 60]\n", " [ 0 96]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.00 0.00 0.00 60\n", " 1 0.62 1.00 0.76 96\n", "\n", " accuracy 0.62 156\n", " macro avg 0.31 0.50 0.38 156\n", "weighted avg 0.38 0.62 0.47 156\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "from sklearn import svm\n", "\n", "#Create a svm Classifier\n", "clf = svm.SVC(kernel='rbf') # Linear Kernel\n", "\n", "#Train the model using the training sets\n", "clf.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred = clf.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred))" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9038461538461539\n", "Confusion Matrix:\n", "[[51 9]\n", " [ 6 90]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.89 0.85 0.87 60\n", " 1 0.91 0.94 0.92 96\n", "\n", " accuracy 0.90 156\n", " macro avg 0.90 0.89 0.90 156\n", "weighted avg 0.90 0.90 0.90 156\n", "\n" ] } ], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "modeln = KNeighborsClassifier(n_neighbors=1)\n", "\n", "#Train the model using the training sets\n", "modeln.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred1 = modeln.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred1))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred1))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred1))" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.9358974358974359\n", "Confusion Matrix:\n", "[[54 6]\n", " [ 4 92]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.93 0.90 0.92 60\n", " 1 0.94 0.96 0.95 96\n", "\n", " accuracy 0.94 156\n", " macro avg 0.93 0.93 0.93 156\n", "weighted avg 0.94 0.94 0.94 156\n", "\n" ] } ], "source": [ "import xgboost as xgb\n", "from sklearn.metrics import mean_squared_error\n", "xg_reg = xgb.XGBClassifier(objective ='reg:logistic', learning_rate = 0.6,\n", " max_depth = 5, alpha = 4, n_estimators = 14)\n", "xg_reg.fit(X_trainset,y_trainset)\n", "\n", "preds = xg_reg.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, preds))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, preds))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, preds))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'tensorflow'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m#import all libraries\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mloadtxt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'" ] } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }