{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\pc\\anaconda3\\lib\\site-packages\\tpot\\builtins\\__init__.py:36: UserWarning: Warning: optional dependency `torch` is not available. - skipping import of NN models.\n",
      "  warnings.warn(\"Warning: optional dependency `torch` is not available. - skipping import of NN models.\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2665, 339)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7507e4ced9764e249fb8b22ca41e8ab4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=320.0, style=ProgressStyle(de…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from tpot import TPOTClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "dataframe=pd.read_excel(r'C:\\Users\\pc\\Documents\\DF features\\features\\full\\shuffle13.xlsx')\n",
    "X=dataframe.iloc[:,:-1]\n",
    "print(X.shape)\n",
    "Y=dataframe.iloc[:,-1]\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n",
    "\n",
    "tpot = TPOTClassifier(generations=3, population_size=80, verbosity=2)\n",
    "tpot.fit(X_train, Y_train)\n",
    "print(tpot.score(X_test, Y_test))\n",
    "tpot.export('tpot_digits_pipeline.py')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tpot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2665, 339)\n",
      "0.8442776735459663\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "from sklearn.svm import SVC\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.ensemble import ExtraTreesClassifier\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "dataframe=pd.read_excel(r'C:\\Users\\pc\\Documents\\DF features\\features\\full\\shuffle13.xlsx')\n",
    "X=dataframe.iloc[:,:-1]\n",
    "print(X.shape)\n",
    "Y=dataframe.iloc[:,-1]\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n",
    "clf = ExtraTreesClassifier(n_estimators=15, random_state=0) \n",
    "clf.fit(X_train,Y_train)  \n",
    "result=clf.score(X_test,Y_test)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7485928705440901\n",
      "[[39  0  0  0  0  0  0  0  0  0  0  0  0]\n",
      " [ 0 29  2  1  0  1  0  0  0  0  1  1  0]\n",
      " [ 0  0 41  2  2  0  1  0  0  1  2  1  1]\n",
      " [ 0  2  4 22  0  7  4  0  0  2  1  0  0]\n",
      " [ 0  0  0  0 46  0  0  0  0  0  0  0  0]\n",
      " [ 0  1  0  0  1 28  2  0  3  2  1  1  0]\n",
      " [ 0  1  0  0  0  0 27  0  0  6  0  1  0]\n",
      " [ 4  0  0  0  1  0  0 20  0  0  0  0  0]\n",
      " [ 0  0  0  0  0  2  0  0 35  0  0  0  0]\n",
      " [ 0  3  0  3  0  1 12  0  0 20  1  0  2]\n",
      " [ 0  6  5  0  1  4  1  0  3  4 20  2  1]\n",
      " [ 1  0  0  0  1  0  0  1  0  0  3 39  0]\n",
      " [ 1  1  2  2  2  0  6  0  0  3  0  0 33]]\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.87      1.00      0.93        39\n",
      "           1       0.67      0.83      0.74        35\n",
      "           2       0.76      0.80      0.78        51\n",
      "           3       0.73      0.52      0.61        42\n",
      "           4       0.85      1.00      0.92        46\n",
      "           5       0.65      0.72      0.68        39\n",
      "           6       0.51      0.77      0.61        35\n",
      "           7       0.95      0.80      0.87        25\n",
      "           8       0.85      0.95      0.90        37\n",
      "           9       0.53      0.48      0.50        42\n",
      "          10       0.69      0.43      0.53        47\n",
      "          11       0.87      0.87      0.87        45\n",
      "          12       0.89      0.66      0.76        50\n",
      "\n",
      "    accuracy                           0.75       533\n",
      "   macro avg       0.76      0.76      0.75       533\n",
      "weighted avg       0.76      0.75      0.74       533\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#chi-square feature selection\n",
    "from sklearn.ensemble import ExtraTreesClassifier\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "sdf=pd.read_excel(r'C:\\Users\\pc\\Documents\\chisquare\\feature\\full\\shuffle13.xlsx')\n",
    "X=sdf.iloc[:,:-1]\n",
    "Y=sdf.iloc[:,-1]\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n",
    "clf = ExtraTreesClassifier(n_estimators=150, random_state=0) \n",
    "clf.fit(X_train,Y_train)\n",
    "result=clf.score(X_test,Y_test)\n",
    "print(result)\n",
    "predictions = clf.predict(X_test) \n",
    "print(confusion_matrix(Y_test,predictions))  \n",
    "print(classification_report(Y_test,predictions)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8273921200750469\n"
     ]
    }
   ],
   "source": [
    "# ig feature selection\n",
    "dfs=pd.read_excel(r'C:\\Users\\pc\\Documents\\IGfeatures\\full\\shuffle13.xlsx')\n",
    "X=dfs.iloc[:,:-1]\n",
    "Y=dfs.iloc[:,-1]\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n",
    "clf = ExtraTreesClassifier(n_estimators=150, random_state=0) \n",
    "clf.fit(X_train,Y_train)\n",
    "result=clf.score(X_test,Y_test)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}