{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\pc\\anaconda3\\lib\\site-packages\\tpot\\builtins\\__init__.py:36: UserWarning: Warning: optional dependency `torch` is not available. - skipping import of NN models.\n", " warnings.warn(\"Warning: optional dependency `torch` is not available. - skipping import of NN models.\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "(2665, 339)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7507e4ced9764e249fb8b22ca41e8ab4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=320.0, style=ProgressStyle(de…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tpot import TPOTClassifier\n", "from sklearn.model_selection import train_test_split\n", "import pandas as pd\n", "import numpy as np\n", "dataframe=pd.read_excel(r'C:\\Users\\pc\\Documents\\DF features\\features\\full\\shuffle13.xlsx')\n", "X=dataframe.iloc[:,:-1]\n", "print(X.shape)\n", "Y=dataframe.iloc[:,-1]\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n", "\n", "tpot = TPOTClassifier(generations=3, population_size=80, verbosity=2)\n", "tpot.fit(X_train, Y_train)\n", "print(tpot.score(X_test, Y_test))\n", "tpot.export('tpot_digits_pipeline.py')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import tpot" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(2665, 339)\n", "0.8442776735459663\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn import metrics\n", "from sklearn.svm import SVC\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "dataframe=pd.read_excel(r'C:\\Users\\pc\\Documents\\DF features\\features\\full\\shuffle13.xlsx')\n", "X=dataframe.iloc[:,:-1]\n", "print(X.shape)\n", "Y=dataframe.iloc[:,-1]\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n", "clf = ExtraTreesClassifier(n_estimators=15, random_state=0) \n", "clf.fit(X_train,Y_train) \n", "result=clf.score(X_test,Y_test)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.7485928705440901\n", "[[39 0 0 0 0 0 0 0 0 0 0 0 0]\n", " [ 0 29 2 1 0 1 0 0 0 0 1 1 0]\n", " [ 0 0 41 2 2 0 1 0 0 1 2 1 1]\n", " [ 0 2 4 22 0 7 4 0 0 2 1 0 0]\n", " [ 0 0 0 0 46 0 0 0 0 0 0 0 0]\n", " [ 0 1 0 0 1 28 2 0 3 2 1 1 0]\n", " [ 0 1 0 0 0 0 27 0 0 6 0 1 0]\n", " [ 4 0 0 0 1 0 0 20 0 0 0 0 0]\n", " [ 0 0 0 0 0 2 0 0 35 0 0 0 0]\n", " [ 0 3 0 3 0 1 12 0 0 20 1 0 2]\n", " [ 0 6 5 0 1 4 1 0 3 4 20 2 1]\n", " [ 1 0 0 0 1 0 0 1 0 0 3 39 0]\n", " [ 1 1 2 2 2 0 6 0 0 3 0 0 33]]\n", " precision recall f1-score support\n", "\n", " 0 0.87 1.00 0.93 39\n", " 1 0.67 0.83 0.74 35\n", " 2 0.76 0.80 0.78 51\n", " 3 0.73 0.52 0.61 42\n", " 4 0.85 1.00 0.92 46\n", " 5 0.65 0.72 0.68 39\n", " 6 0.51 0.77 0.61 35\n", " 7 0.95 0.80 0.87 25\n", " 8 0.85 0.95 0.90 37\n", " 9 0.53 0.48 0.50 42\n", " 10 0.69 0.43 0.53 47\n", " 11 0.87 0.87 0.87 45\n", " 12 0.89 0.66 0.76 50\n", "\n", " accuracy 0.75 533\n", " macro avg 0.76 0.76 0.75 533\n", "weighted avg 0.76 0.75 0.74 533\n", "\n" ] } ], "source": [ "#chi-square feature selection\n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "sdf=pd.read_excel(r'C:\\Users\\pc\\Documents\\chisquare\\feature\\full\\shuffle13.xlsx')\n", "X=sdf.iloc[:,:-1]\n", "Y=sdf.iloc[:,-1]\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n", "clf = ExtraTreesClassifier(n_estimators=150, random_state=0) \n", "clf.fit(X_train,Y_train)\n", "result=clf.score(X_test,Y_test)\n", "print(result)\n", "predictions = clf.predict(X_test) \n", "print(confusion_matrix(Y_test,predictions)) \n", "print(classification_report(Y_test,predictions)) " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8273921200750469\n" ] } ], "source": [ "# ig feature selection\n", "dfs=pd.read_excel(r'C:\\Users\\pc\\Documents\\IGfeatures\\full\\shuffle13.xlsx')\n", "X=dfs.iloc[:,:-1]\n", "Y=dfs.iloc[:,-1]\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,shuffle=False)\n", "clf = ExtraTreesClassifier(n_estimators=150, random_state=0) \n", "clf.fit(X_train,Y_train)\n", "result=clf.score(X_test,Y_test)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }