{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import recall_score\n",
    "import matplotlib.image as mpimg\n",
    "from sklearn import tree\n",
    "from sklearn import preprocessing\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.neural_network import MLPClassifier\n",
    "%matplotlib inline "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(520, 17)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.read_csv(\"D:/Datasets/diabetes_data_upload.csv\", delimiter=\",\")\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_enco = preprocessing.LabelEncoder() #Label encoding\n",
    "df['Gender']=label_enco.fit_transform(df['Gender'])\n",
    "df['Polyuria']=label_enco.fit_transform(df['Polyuria'])\n",
    "df['Polydipsia']=label_enco.fit_transform(df['Polydipsia'])\n",
    "df['suddenweightloss']=label_enco.fit_transform(df['suddenweightloss'])\n",
    "df['weakness']=label_enco.fit_transform(df['weakness'])\n",
    "df['Polyphagia']=label_enco.fit_transform(df['Polyphagia'])\n",
    "df['Genitalthrush']=label_enco.fit_transform(df['Genitalthrush'])\n",
    "df['visualblurring']=label_enco.fit_transform(df['visualblurring'])\n",
    "df['Itching']=label_enco.fit_transform(df['Itching'])\n",
    "df['Irritability']=label_enco.fit_transform(df['Irritability'])\n",
    "df['delayedhealing']=label_enco.fit_transform(df['delayedhealing'])\n",
    "df['partialparesis']=label_enco.fit_transform(df['partialparesis'])\n",
    "df['musclestiffness']=label_enco.fit_transform(df['musclestiffness'])\n",
    "df['Alopecia']=label_enco.fit_transform(df['Alopecia'])\n",
    "df['Obesity']=label_enco.fit_transform(df['Obesity'])\n",
    "df['Target']=label_enco.fit_transform(df['Target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Polyuria</th>\n",
       "      <th>Polydipsia</th>\n",
       "      <th>suddenweightloss</th>\n",
       "      <th>weakness</th>\n",
       "      <th>Polyphagia</th>\n",
       "      <th>Genitalthrush</th>\n",
       "      <th>visualblurring</th>\n",
       "      <th>Itching</th>\n",
       "      <th>Irritability</th>\n",
       "      <th>delayedhealing</th>\n",
       "      <th>partialparesis</th>\n",
       "      <th>musclestiffness</th>\n",
       "      <th>Alopecia</th>\n",
       "      <th>Obesity</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>58</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Age  Gender  Polyuria  Polydipsia  suddenweightloss  weakness  Polyphagia  \\\n",
       "0   40       1         0           1                 0         1           0   \n",
       "1   58       1         0           0                 0         1           0   \n",
       "2   41       1         1           0                 0         1           1   \n",
       "\n",
       "   Genitalthrush  visualblurring  Itching  Irritability  delayedhealing  \\\n",
       "0              0               0        1             0               1   \n",
       "1              0               1        0             0               0   \n",
       "2              0               0        1             0               1   \n",
       "\n",
       "   partialparesis  musclestiffness  Alopecia  Obesity  Target  \n",
       "0               0                1         1        1       1  \n",
       "1               1                0         1        0       1  \n",
       "2               0                1         1        0       1  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "X=df[['Age','Gender','Polyuria','Polydipsia','suddenweightloss','weakness','Polyphagia','Genitalthrush','visualblurring','Itching','Irritability','delayedhealing','partialparesis','musclestiffness','Alopecia','Obesity']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y=df['Target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(520, 16)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(364, 16) n (156, 16)\n"
     ]
    }
   ],
   "source": [
    "print(X_trainset.shape,\"n\",X_testset.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(364,)    (156,)\n"
     ]
    }
   ],
   "source": [
    "print(y_trainset.shape,\"  \",y_testset.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(criterion='entropy', max_depth=5)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drugTree = DecisionTreeClassifier(criterion=\"entropy\", max_depth = 5)\n",
    "drugTree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(criterion='entropy', max_depth=5)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drugTree.fit(X_trainset,y_trainset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "predTree = drugTree.predict(X_testset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy:  0.9423076923076923\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predTree))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9347826086956521\n",
      "0.9423076923076923\n",
      "0.9498327759197324\n"
     ]
    }
   ],
   "source": [
    "print(precision_score(y_testset,  predTree, average='macro'))\n",
    "print(precision_score(y_testset,  predTree, average='micro'))\n",
    "print(precision_score(y_testset,  predTree, average='weighted'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9423076923076923\n"
     ]
    }
   ],
   "source": [
    "print(recall_score(y_testset, predTree, average='micro'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.87      1.00      0.93        60\n",
      "           1       1.00      0.91      0.95        96\n",
      "\n",
      "    accuracy                           0.94       156\n",
      "   macro avg       0.93      0.95      0.94       156\n",
      "weighted avg       0.95      0.94      0.94       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "print(classification_report(y_testset,predTree))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=3)\n",
    "gnb = GaussianNB()\n",
    "nfit=gnb.fit(X_trainset,y_trainset)\n",
    "npred=nfit.predict(X_testset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.90      0.88      0.89        60\n",
      "           1       0.93      0.94      0.93        96\n",
      "\n",
      "    accuracy                           0.92       156\n",
      "   macro avg       0.91      0.91      0.91       156\n",
      "weighted avg       0.92      0.92      0.92       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(classification_report(y_testset,npred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy:  0.9166666666666666\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy: \", metrics.accuracy_score(y_testset,npred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, Y, test_size=0.3, random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9743589743589743\n",
      "Confusion Matrix:\n",
      "[[60  2]\n",
      " [ 2 92]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.97      0.97      0.97        62\n",
      "           1       0.98      0.98      0.98        94\n",
      "\n",
      "    accuracy                           0.97       156\n",
      "   macro avg       0.97      0.97      0.97       156\n",
      "weighted avg       0.97      0.97      0.97       156\n",
      "\n",
      "RandomForestClassifier(n_estimators=30)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "#Create a Gaussian Classifier\n",
    "clf=RandomForestClassifier(n_estimators=30)\n",
    "\n",
    "#Train the model using the training sets y_pred=clf.predict(X_test)\n",
    "clf.fit(X_trainset,y_trainset)\n",
    "\n",
    "yhat4=clf.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat4))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, yhat4))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, yhat4))\n",
    "print(clf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=0.01)"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import confusion_matrix\n",
    "LR = LogisticRegression(C=0.01, solver='lbfgs').fit(X_trainset,y_trainset)\n",
    "LR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,\n",
       "       1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,\n",
       "       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,\n",
       "       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,\n",
       "       1, 1])"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "yhat = LR.predict(X_testset)\n",
    "yhat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "yhat_prob = LR.predict_proba(X_testset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.95      0.58      0.72        60\n",
      "           1       0.79      0.98      0.87        96\n",
      "\n",
      "    accuracy                           0.83       156\n",
      "   macro avg       0.87      0.78      0.80       156\n",
      "weighted avg       0.85      0.83      0.82       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print (classification_report(y_testset, yhat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy:  0.8269230769230769\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy: \", metrics.accuracy_score(y_testset, yhat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Learning rate:  0.05\n",
      "Accuracy score (training): 0.964\n",
      "Accuracy score (validation): 0.942\n",
      "Learning rate:  0.075\n",
      "Accuracy score (training): 0.959\n",
      "Accuracy score (validation): 0.955\n",
      "Learning rate:  0.1\n",
      "Accuracy score (training): 0.959\n",
      "Accuracy score (validation): 0.955\n",
      "Learning rate:  0.25\n",
      "Accuracy score (training): 0.992\n",
      "Accuracy score (validation): 0.981\n",
      "Learning rate:  0.5\n",
      "Accuracy score (training): 0.997\n",
      "Accuracy score (validation): 0.987\n",
      "Learning rate:  0.75\n",
      "Accuracy score (training): 1.000\n",
      "Accuracy score (validation): 0.968\n",
      "Learning rate:  1\n",
      "Accuracy score (training): 1.000\n",
      "Accuracy score (validation): 0.987\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]\n",
    "\n",
    "for learning_rate in lr_list:\n",
    "    gb_clf = GradientBoostingClassifier(n_estimators=19, learning_rate=learning_rate, max_features=2, max_depth=4, random_state=0)\n",
    "    gb_clf.fit(X_trainset,y_trainset)\n",
    "\n",
    "    print(\"Learning rate: \", learning_rate)\n",
    "    print(\"Accuracy score (training): {0:.3f}\".format(gb_clf.score(X_trainset,y_trainset)))\n",
    "    print(\"Accuracy score (validation): {0:.3f}\".format(gb_clf.score(X_testset,y_testset)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix:\n",
      "[[60  2]\n",
      " [ 4 90]]\n",
      "Accuracy:  0.9615384615384616\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.94      0.97      0.95        62\n",
      "           1       0.98      0.96      0.97        94\n",
      "\n",
      "    accuracy                           0.96       156\n",
      "   macro avg       0.96      0.96      0.96       156\n",
      "weighted avg       0.96      0.96      0.96       156\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GradientBoostingClassifier(learning_rate=0.3, max_depth=4, max_features=2,\n",
       "                           n_estimators=11, random_state=0)"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gb_clf1 = GradientBoostingClassifier(n_estimators=11, learning_rate=0.3, max_features=2, max_depth=4, random_state=0)\n",
    "gb_clf1.fit(X_trainset,y_trainset)\n",
    "predictions = gb_clf1.predict(X_testset)\n",
    "\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, predictions))\n",
    "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predictions))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, predictions))\n",
    "gb_clf1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9487179487179487\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "abc = AdaBoostClassifier(n_estimators=20,\n",
    "                         learning_rate=0.5)\n",
    "# Train Adaboost Classifer\n",
    "model = abc.fit(X_trainset,y_trainset)\n",
    "\n",
    "#Predict the response for test dataset\n",
    "y_pred = model.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.6474358974358975\n",
      "Confusion Matrix:\n",
      "[[14 46]\n",
      " [ 9 87]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.61      0.23      0.34        60\n",
      "           1       0.65      0.91      0.76        96\n",
      "\n",
      "    accuracy                           0.65       156\n",
      "   macro avg       0.63      0.57      0.55       156\n",
      "weighted avg       0.64      0.65      0.60       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "\n",
    "# Import Support Vector Classifier\n",
    "from sklearn.svm import SVC\n",
    "#Import scikit-learn metrics module for accuracy calculation\n",
    "from sklearn import metrics\n",
    "svc=SVC(probability=True, kernel='rbf')\n",
    "#LR = LogisticRegression(C=0.001, solver='liblinear')\n",
    "\n",
    "# Create adaboost classifer object\n",
    "abc =AdaBoostClassifier(n_estimators=20, base_estimator=svc,learning_rate=0.5)\n",
    "\n",
    "# Train Adaboost Classifer\n",
    "model = abc.fit(X_trainset,y_trainset)\n",
    "\n",
    "#Predict the response for test dataset\n",
    "y_pred = model.predict(X_testset)\n",
    "\n",
    "\n",
    "# Model Accuracy, how often is the classifier correct?\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, y_pred))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.8205128205128205\n",
      "Confusion Matrix:\n",
      "[[57  3]\n",
      " [25 71]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.70      0.95      0.80        60\n",
      "           1       0.96      0.74      0.84        96\n",
      "\n",
      "    accuracy                           0.82       156\n",
      "   macro avg       0.83      0.84      0.82       156\n",
      "weighted avg       0.86      0.82      0.82       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.datasets import make_classification\n",
    "from sklearn.ensemble import VotingClassifier\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "# define the base models\n",
    "models = list()\n",
    "models.append(('knn1', KNeighborsClassifier(n_neighbors=1)))\n",
    "models.append(('knn3', KNeighborsClassifier(n_neighbors=3)))\n",
    "models.append(('knn5', KNeighborsClassifier(n_neighbors=5)))\n",
    "models.append(('knn7', KNeighborsClassifier(n_neighbors=7)))\n",
    "models.append(('knn9', KNeighborsClassifier(n_neighbors=9)))\n",
    "models.append(('knn11', KNeighborsClassifier(n_neighbors=11)))\n",
    "# define the hard voting ensemble\n",
    "ensemble = VotingClassifier(estimators=models, voting='hard')\n",
    "# fit the model on all available data\n",
    "ensemble.fit(X_trainset,y_trainset)\n",
    "# make a prediction for one example\n",
    "yhat = ensemble.predict(X_testset)\n",
    "# Model Accuracy, how often is the classifier correct?\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, yhat))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, yhat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.8910256410256411\n",
      "Confusion Matrix:\n",
      "[[57  3]\n",
      " [14 82]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.80      0.95      0.87        60\n",
      "           1       0.96      0.85      0.91        96\n",
      "\n",
      "    accuracy                           0.89       156\n",
      "   macro avg       0.88      0.90      0.89       156\n",
      "weighted avg       0.90      0.89      0.89       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.datasets import make_classification\n",
    "from sklearn.ensemble import VotingClassifier\n",
    "from sklearn.svm import SVC\n",
    "# define dataset\n",
    "X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=2)\n",
    "# define the base models\n",
    "models = list()\n",
    "models.append(('svm1', SVC(probability=True, kernel='poly', degree=1)))\n",
    "models.append(('svm2', SVC(probability=True, kernel='poly', degree=2)))\n",
    "models.append(('svm3', SVC(probability=True, kernel='poly', degree=3)))\n",
    "models.append(('svm4', SVC(probability=True, kernel='poly', degree=4)))\n",
    "models.append(('svm5', SVC(probability=True, kernel='poly', degree=5)))\n",
    "# define the soft voting ensemble\n",
    "ensemble = VotingClassifier(estimators=models, voting='soft')\n",
    "# fit the model on all available data\n",
    "ensemble.fit(X_trainset,y_trainset)\n",
    "# make a prediction for one example\n",
    "yhat = ensemble.predict(X_testset)\n",
    "# Model Accuracy, how often is the classifier correct?\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, yhat))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, yhat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9423076923076923\n",
      "Confusion Matrix:\n",
      "[[58  2]\n",
      " [ 7 89]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.89      0.97      0.93        60\n",
      "           1       0.98      0.93      0.95        96\n",
      "\n",
      "    accuracy                           0.94       156\n",
      "   macro avg       0.94      0.95      0.94       156\n",
      "weighted avg       0.95      0.94      0.94       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import BaggingClassifier\n",
    "# define dataset\n",
    "model = BaggingClassifier(n_estimators=10)\n",
    "# fit the model on the whole dataset\n",
    "model.fit(X_trainset,y_trainset)\n",
    "yhat2 = model.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat2))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, yhat2))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, yhat2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9551282051282052\n",
      "Confusion Matrix:\n",
      "[[58  2]\n",
      " [ 5 91]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.92      0.97      0.94        60\n",
      "           1       0.98      0.95      0.96        96\n",
      "\n",
      "    accuracy                           0.96       156\n",
      "   macro avg       0.95      0.96      0.95       156\n",
      "weighted avg       0.96      0.96      0.96       156\n",
      "\n",
      "RandomForestClassifier(n_estimators=50)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "#Create a Gaussian Classifier\n",
    "clf=RandomForestClassifier(n_estimators=50)\n",
    "\n",
    "#Train the model using the training sets y_pred=clf.predict(X_test)\n",
    "clf.fit(X_trainset,y_trainset)\n",
    "\n",
    "yhat4=clf.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat4))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, yhat4))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, yhat4))\n",
    "print(clf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.6153846153846154\n",
      "Confusion Matrix:\n",
      "[[ 0 60]\n",
      " [ 0 96]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.00      0.00      0.00        60\n",
      "           1       0.62      1.00      0.76        96\n",
      "\n",
      "    accuracy                           0.62       156\n",
      "   macro avg       0.31      0.50      0.38       156\n",
      "weighted avg       0.38      0.62      0.47       156\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n",
      "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n",
      "C:\\Users\\waqas\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n"
     ]
    }
   ],
   "source": [
    "from sklearn import svm\n",
    "\n",
    "#Create a svm Classifier\n",
    "clf = svm.SVC(kernel='rbf') # Linear Kernel\n",
    "\n",
    "#Train the model using the training sets\n",
    "clf.fit(X_trainset,y_trainset)\n",
    "\n",
    "#Predict the response for test dataset\n",
    "y_pred = clf.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, y_pred))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9038461538461539\n",
      "Confusion Matrix:\n",
      "[[51  9]\n",
      " [ 6 90]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.89      0.85      0.87        60\n",
      "           1       0.91      0.94      0.92        96\n",
      "\n",
      "    accuracy                           0.90       156\n",
      "   macro avg       0.90      0.89      0.90       156\n",
      "weighted avg       0.90      0.90      0.90       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "modeln = KNeighborsClassifier(n_neighbors=1)\n",
    "\n",
    "#Train the model using the training sets\n",
    "modeln.fit(X_trainset,y_trainset)\n",
    "\n",
    "#Predict the response for test dataset\n",
    "y_pred1 = modeln.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred1))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, y_pred1))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, y_pred1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9358974358974359\n",
      "Confusion Matrix:\n",
      "[[54  6]\n",
      " [ 4 92]]\n",
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.93      0.90      0.92        60\n",
      "           1       0.94      0.96      0.95        96\n",
      "\n",
      "    accuracy                           0.94       156\n",
      "   macro avg       0.93      0.93      0.93       156\n",
      "weighted avg       0.94      0.94      0.94       156\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import xgboost as xgb\n",
    "from sklearn.metrics import mean_squared_error\n",
    "xg_reg = xgb.XGBClassifier(objective ='reg:logistic', learning_rate = 0.6,\n",
    "                max_depth = 5, alpha = 4, n_estimators = 14)\n",
    "xg_reg.fit(X_trainset,y_trainset)\n",
    "\n",
    "preds = xg_reg.predict(X_testset)\n",
    "print(\"Accuracy:\",metrics.accuracy_score(y_testset, preds))\n",
    "print(\"Confusion Matrix:\")\n",
    "print(confusion_matrix(y_testset, preds))\n",
    "\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_testset, preds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'tensorflow'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-38-348e9a7b59ae>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#import all libraries\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mloadtxt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}