{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "import numpy as np \n", "import pandas as pd\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import metrics\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "import matplotlib.image as mpimg\n", "from sklearn import tree\n", "from sklearn import preprocessing\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn import svm\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df=df=pd.read_csv(\"E:/Datasets/pimadiabetes.csv\", delimiter=\",\")\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "X=df[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']]\n", "A=X.values" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,\n", " 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,\n", " 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n", " 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", " 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", " 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n", " 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", " 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,\n", " 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,\n", " 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,\n", " 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,\n", " 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", " 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,\n", " 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,\n", " 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", " 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,\n", " 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,\n", " 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,\n", " 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", " 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", " 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,\n", " 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0,\n", " 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,\n", " 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,\n", " 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,\n", " 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,\n", " 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,\n", " 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0],\n", " dtype=int64)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y=df['Outcome']\n", "B=Y.values\n", "B" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "print(scores)\n", "#cross_val_predict(best_svr, X, y, cv=10)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.linear_model import LassoCV\n", "\n", "lasso = LassoCV().fit(X, Y)\n", "importance = np.abs(lasso.coef_)\n", "feature_names = np.array(X.columns)\n", "plt.bar(height=importance, x=feature_names)\n", "plt.title(\"Feature importances via coefficients\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "scaler = StandardScaler().fit(X)\n", "\n", "normalizeX = scaler.transform(X)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Features selected by forward sequential selection: ['Pregnancies' 'Glucose' 'BloodPressure' 'BMI' 'DiabetesPedigreeFunction'\n", " 'Age']\n", "Done in 26.758s\n", "Features selected by backward sequential selection: ['Pregnancies' 'Glucose' 'BloodPressure' 'BMI' 'DiabetesPedigreeFunction'\n", " 'Age']\n", "Done in 12.736s\n" ] } ], "source": [ "from sklearn.feature_selection import SequentialFeatureSelector\n", "from sklearn.ensemble import RandomForestClassifier\n", "from time import time\n", "#Create a Gaussian Classifier\n", "clf2=RandomForestClassifier(n_estimators=50)\n", "\n", "tic_fwd = time()\n", "sfs_forward = SequentialFeatureSelector(clf2, n_features_to_select=6,\n", " direction='forward').fit(normalizeX, Y)\n", "toc_fwd = time()\n", "\n", "tic_bwd = time()\n", "sfs_backward = SequentialFeatureSelector(clf2, n_features_to_select=6,\n", " direction='backward').fit(normalizeX, Y)\n", "toc_bwd = time()\n", "\n", "print(\"Features selected by forward sequential selection: \"f\"{feature_names[sfs_forward.get_support()]}\")\n", "print(f\"Done in {toc_fwd - tic_fwd:.3f}s\")\n", "print(\"Features selected by backward sequential selection: \"\n", " f\"{feature_names[sfs_backward.get_support()]}\")\n", "print(f\"Done in {toc_bwd - tic_bwd:.3f}s\")\n", "a=feature_names[sfs_forward.get_support()]\n", "b=feature_names[sfs_backward.get_support()]\n", "x1=df[a]\n", "x2=df[b]" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "scaler = StandardScaler().fit(x2)\n", "\n", "normalizeX = scaler.transform(x2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "X_trainset, X_testset, y_trainset, y_testset = train_test_split(normalizeX, Y, test_size=0.3, random_state=3)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7402597402597403\n" ] } ], "source": [ "#xtrain, xtst, ytrain, ytst = train_test_split(n, Y, test_size=0.3, random_state=0)\n", "drugTree = DecisionTreeClassifier(criterion=\"entropy\", max_depth = 5)\n", "drugTree.fit(X_trainset,y_trainset)\n", "predTree = drugTree.predict(X_testset)\n", "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predTree))\n" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.74 0.89 0.81 133\n", " 1 0.79 0.57 0.66 98\n", "\n", " accuracy 0.75 231\n", " macro avg 0.76 0.73 0.73 231\n", "weighted avg 0.76 0.75 0.74 231\n", "\n" ] } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import classification_report\n", "LR = LogisticRegression(solver='liblinear').fit(X_trainset,y_trainset)\n", "yhat = LR.predict(X_testset)\n", "print (classification_report(y_testset, yhat))" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7489177489177489\n", "Confusion Matrix:\n", "[[118 15]\n", " [ 43 55]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.73 0.89 0.80 133\n", " 1 0.79 0.56 0.65 98\n", "\n", " accuracy 0.75 231\n", " macro avg 0.76 0.72 0.73 231\n", "weighted avg 0.76 0.75 0.74 231\n", "\n" ] } ], "source": [ "from sklearn import svm\n", "\n", "#Create a svm Classifier\n", "clf = svm.SVC(kernel='rbf') # Linear Kernel\n", "\n", "#Train the model using the training sets\n", "clf.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred = clf.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred))" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.6666666666666666\n", "Confusion Matrix:\n", "[[118 15]\n", " [ 62 36]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.66 0.89 0.75 133\n", " 1 0.71 0.37 0.48 98\n", "\n", " accuracy 0.67 231\n", " macro avg 0.68 0.63 0.62 231\n", "weighted avg 0.68 0.67 0.64 231\n", "\n" ] } ], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "modeln = KNeighborsClassifier(n_neighbors=4)\n", "\n", "#Train the model using the training sets\n", "modeln.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred1 = modeln.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred1))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred1))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred1))" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'listt' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensemble\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mAdaBoostClassifier\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mlistt\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m abc = AdaBoostClassifier(n_estimators=i,\n\u001b[0;32m 4\u001b[0m learning_rate=0.5)\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# Train Adaboost Classifer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'listt' is not defined" ] } ], "source": [ "from sklearn.ensemble import AdaBoostClassifier\n", "for i in listt:\n", " abc = AdaBoostClassifier(n_estimators=i,\n", " learning_rate=0.5)\n", "# Train Adaboost Classifer\n", " model = abc.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", " y_pred = model.predict(X_testset)\n", " print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7359307359307359\n", "Confusion Matrix:\n", "[[117 16]\n", " [ 45 53]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.72 0.88 0.79 133\n", " 1 0.77 0.54 0.63 98\n", "\n", " accuracy 0.74 231\n", " macro avg 0.75 0.71 0.71 231\n", "weighted avg 0.74 0.74 0.73 231\n", "\n" ] } ], "source": [ "from sklearn.ensemble import AdaBoostClassifier\n", "\n", "# Import Support Vector Classifier\n", "from sklearn.svm import SVC\n", "#Import scikit-learn metrics module for accuracy calculation\n", "from sklearn import metrics\n", "svc=SVC(probability=True, kernel='linear')\n", "#LR = LogisticRegression(C=0.001, solver='liblinear')\n", "LR = LogisticRegression(solver='liblinear')\n", "# Create adaboost classifer object\n", "abc =AdaBoostClassifier(n_estimators=20, base_estimator=LR,learning_rate=0.5)\n", "\n", "# Train Adaboost Classifer\n", "model = abc.fit(X_trainset,y_trainset)\n", "\n", "#Predict the response for test dataset\n", "y_pred = model.predict(X_testset)\n", "\n", "\n", "# Model Accuracy, how often is the classifier correct?\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, y_pred))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, y_pred))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, y_pred))" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7489177489177489\n", "Confusion Matrix:\n", "[[111 22]\n", " [ 36 62]]\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.76 0.83 0.79 133\n", " 1 0.74 0.63 0.68 98\n", "\n", " accuracy 0.75 231\n", " macro avg 0.75 0.73 0.74 231\n", "weighted avg 0.75 0.75 0.75 231\n", "\n", "RandomForestClassifier(n_estimators=50)\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "#Create a Gaussian Classifier\n", "clf=RandomForestClassifier(n_estimators=50)\n", "\n", "#Train the model using the training sets y_pred=clf.predict(X_test)\n", "clf.fit(X_trainset,y_trainset)\n", "\n", "yhat4=clf.predict(X_testset)\n", "print(\"Accuracy:\",metrics.accuracy_score(y_testset, yhat4))\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, yhat4))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, yhat4))\n", "print(clf)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Learning rate: 0.001\n", "Accuracy score (training): 0.683\n", "Accuracy score (validation): 0.576\n", "Learning rate: 0.002\n", "Accuracy score (training): 0.683\n", "Accuracy score (validation): 0.576\n", "Learning rate: 0.05\n", "Accuracy score (training): 0.842\n", "Accuracy score (validation): 0.671\n", "Learning rate: 0.075\n", "Accuracy score (training): 0.862\n", "Accuracy score (validation): 0.719\n", "Learning rate: 0.1\n", "Accuracy score (training): 0.877\n", "Accuracy score (validation): 0.714\n", "Learning rate: 0.25\n", "Accuracy score (training): 0.914\n", "Accuracy score (validation): 0.732\n", "Learning rate: 0.5\n", "Accuracy score (training): 0.974\n", "Accuracy score (validation): 0.706\n", "Learning rate: 0.75\n", "Accuracy score (training): 0.989\n", "Accuracy score (validation): 0.740\n", "Learning rate: 1\n", "Accuracy score (training): 0.994\n", "Accuracy score (validation): 0.688\n" ] } ], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", "lr_list = [0.001,0.002,0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]\n", "\n", "for learning_rate in lr_list:\n", " gb_clf = GradientBoostingClassifier(n_estimators=19, learning_rate=learning_rate, max_features=2, max_depth=4, random_state=0)\n", " gb_clf.fit(X_trainset,y_trainset)\n", "\n", " print(\"Learning rate: \", learning_rate)\n", " print(\"Accuracy score (training): {0:.3f}\".format(gb_clf.score(X_trainset,y_trainset)))\n", " print(\"Accuracy score (validation): {0:.3f}\".format(gb_clf.score(X_testset,y_testset)))" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Confusion Matrix:\n", "[[106 27]\n", " [ 41 57]]\n", "Accuracy: 0.7056277056277056\n", "Classification Report\n", " precision recall f1-score support\n", "\n", " 0 0.72 0.80 0.76 133\n", " 1 0.68 0.58 0.63 98\n", "\n", " accuracy 0.71 231\n", " macro avg 0.70 0.69 0.69 231\n", "weighted avg 0.70 0.71 0.70 231\n", "\n" ] }, { "data": { "text/plain": [ "GradientBoostingClassifier(learning_rate=0.5, max_depth=4, max_features=2,\n", " n_estimators=19, random_state=0)" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gb_clf1 = GradientBoostingClassifier(n_estimators=19, learning_rate=0.5, max_features=2, max_depth=4, random_state=0)\n", "gb_clf1.fit(X_trainset,y_trainset)\n", "predictions = gb_clf1.predict(X_testset)\n", "\n", "print(\"Confusion Matrix:\")\n", "print(confusion_matrix(y_testset, predictions))\n", "print(\"Accuracy: \", metrics.accuracy_score(y_testset, predictions))\n", "\n", "print(\"Classification Report\")\n", "print(classification_report(y_testset, predictions))\n", "gb_clf1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }