{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "executionInfo": { "elapsed": 1333, "status": "ok", "timestamp": 1616768427523, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "BRtpZetrlYGA" }, "outputs": [], "source": [ "#Libraries for the \n", "import os\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 83702, "status": "ok", "timestamp": 1616768514451, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "7OPIvVLoMiga", "outputId": "336770f9-e733-43f7-f82b-e348957d2ec2" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": { "executionInfo": { "elapsed": 11256, "status": "ok", "timestamp": 1616768516632, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "6iggmdfaHfsF" }, "outputs": [], "source": [ "data=pd.read_csv(\"PreprocessReview.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 112 }, "executionInfo": { "elapsed": 9976, "status": "ok", "timestamp": 1616768516641, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "eaY4QIHrN25p", "outputId": "b45e96f4-43fb-4541-d98f-3fc2c148e039" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0Unnamed: 0.1reviewlabelfilecleanText
0021939Gwyneth Paltrow is absolutely great in this mo...07246_4.txtgwyneth paltrow absolutely great movie story u...
1124113I own this movie. Not by choice, I do. I was r...09202_1.txtmovie choice really bored day box intrigued po...
\n", "
" ], "text/plain": [ " Unnamed: 0 Unnamed: 0.1 \\\n", "0 0 21939 \n", "1 1 24113 \n", "\n", " review label file \\\n", "0 Gwyneth Paltrow is absolutely great in this mo... 0 7246_4.txt \n", "1 I own this movie. Not by choice, I do. I was r... 0 9202_1.txt \n", "\n", " cleanText \n", "0 gwyneth paltrow absolutely great movie story u... \n", "1 movie choice really bored day box intrigued po... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[:2]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 7116, "status": "ok", "timestamp": 1616768516643, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "J9H7ZzSy5ciU", "outputId": "5bd3b935-32d8-4d8c-f2a1-b9493e629edb" }, "outputs": [ { "data": { "text/plain": [ "25000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# number of tweets in CSV file\n", "len(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " del sys.path[0]\n", "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:17: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:21: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:19: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" ] } ], "source": [ "# textblob library to extract the polarity score to find sentiments\n", "\n", "import nltk\n", "from textblob import TextBlob\n", "data['TextBlob Score']=\"\"\n", "data['TextBlob Sentiment']=\"\"\n", "#df2 = pd.DataFrame(columns=['text', 'sentiment', 'score'])\n", "data['cleanText']=data['cleanText'].astype(str)\n", "for i in range(len(data)):\n", " sentiment = TextBlob(data['cleanText'][i])\n", " a=sentiment.sentiment.polarity\n", " #df2.loc[i] = [data['cleanText'][i]]+[str(0)]+ [a]\n", " data[\"TextBlob Score\"][i]=a\n", "\n", "for i in range(len(data)):\n", " if(data['TextBlob Score'][i]>0):\n", " data['TextBlob Sentiment'][i]=\"Positive\"\n", " elif(data['TextBlob Score'][i]==0):\n", " data['TextBlob Sentiment'][i]=\"Neutral\"\n", " else:\n", " data['TextBlob Sentiment'][i]=\"Negative\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Positve negative extraction" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18309\n", "6668\n" ] } ], "source": [ "data5=data.loc[data['TextBlob Sentiment'] == \"Positive\"]\n", "print(len(data5))\n", "data4=data.loc[data['TextBlob Sentiment'] == \"Negative\"]\n", "print(len(data4))" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "dataF=data5.append(data4,ignore_index = True) " ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 5753, "status": "ok", "timestamp": 1616768516645, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "Bp3ZalVnQved", "outputId": "008fc640-61de-40a4-d451-ee0c54db9cc5" }, "outputs": [ { "data": { "text/plain": [ "Counter({'Positive': 18309, 'Negative': 6668})" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from collections import Counter\n", "Counter( dataF['TextBlob Sentiment'])" ] }, { "cell_type": "markdown", "metadata": { "id": "TbBISAtr_KYE" }, "source": [ "Supervised machine leanring approach start from **here**" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "executionInfo": { "elapsed": 1097, "status": "ok", "timestamp": 1616769068814, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "Yp1gzlOHHU0e" }, "outputs": [], "source": [ "#Data Splitting into training and testing sets for machine leanring models\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(data[\"cleanText\"],data['label'],test_size=0.25,shuffle=True)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import confusion_matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 98, "metadata": { "executionInfo": { "elapsed": 1472, "status": "ok", "timestamp": 1616769073354, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "Ck16zeQNHU0e" }, "outputs": [], "source": [ "#TF-IDF features \n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "vectorizer = TfidfVectorizer(max_features=5000)\n", "X = vectorizer.fit_transform(X_train)\n", "y = vectorizer.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "id": "YvbLqoT5HU0g" }, "outputs": [], "source": [ "#TF-IDF features \n", "from sklearn.feature_extraction.text import CountVectorizer\n", "vectorizer = CountVectorizer(max_features=5000)\n", "X = vectorizer.fit_transform(X_train)\n", "y = vectorizer.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "#Glove features \n", "from zeugma.embeddings import EmbeddingTransformer\n", "glove = EmbeddingTransformer('glove')\n", "X = glove.fit_transform(X_train)\n", "y = glove.transform(X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 127, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 9296, "status": "ok", "timestamp": 1616769083545, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "fhz17ZiEHU0g", "outputId": "d850c7f8-72d7-4626-bfe1-0fc4f64f643a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.64608\n", " precision recall f1-score support\n", "\n", " 0 0.65 0.65 0.65 3125\n", " 1 0.65 0.65 0.65 3125\n", "\n", " accuracy 0.65 6250\n", " macro avg 0.65 0.65 0.65 6250\n", "weighted avg 0.65 0.65 0.65 6250\n", "\n", "[[2017 1108]\n", " [1104 2021]]\n" ] } ], "source": [ "#Decsion tree machine leanirng models\n", "from sklearn.tree import DecisionTreeClassifier\n", "dt=DecisionTreeClassifier(max_depth=50)\n", "dtPre=dt.fit(X, y_train).predict(y)\n", "print(accuracy_score(y_test,dtPre))\n", "print(classification_report(y_test,dtPre))\n", "print(confusion_matrix(y_test,dtPre))" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 215810, "status": "ok", "timestamp": 1616769293475, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "LoNl5sH3HU0g", "outputId": "7c4e39f2-74d9-4e91-9ddf-d94af2c6cf21" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.79888\n", " precision recall f1-score support\n", "\n", " 0 0.83 0.75 0.79 3125\n", " 1 0.77 0.85 0.81 3125\n", "\n", " accuracy 0.80 6250\n", " macro avg 0.80 0.80 0.80 6250\n", "weighted avg 0.80 0.80 0.80 6250\n", "\n", "[[2340 785]\n", " [ 472 2653]]\n" ] } ], "source": [ "#Random forest machine leanring models\n", "from sklearn.ensemble import RandomForestClassifier\n", "rfc = RandomForestClassifier(n_estimators=300, random_state=2,max_depth=300) \n", "rfc.fit(X, y_train)\n", "# calculate accuracy of class predictions\n", "y_pred_class = rfc.predict(y)\n", "print(accuracy_score(y_test, y_pred_class))\n", "print(classification_report(y_test, y_pred_class))\n", "print(confusion_matrix(y_test, y_pred_class))" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 218547, "status": "ok", "timestamp": 1616769299244, "user": { "displayName": "Furqan Rustam", "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg5oat7XG884rtNww-Dgt4sEfDMp6vxfwes8ntbog=s64", "userId": "11638408104830397330" }, "user_tz": -300 }, "id": "Vz0q3bh_HU0h", "outputId": "099cd005-441b-46cd-d1fe-1e6a76be5e53" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.65216\n", " precision recall f1-score support\n", "\n", " 0 0.65 0.65 0.65 3125\n", " 1 0.65 0.65 0.65 3125\n", "\n", " accuracy 0.65 6250\n", " macro avg 0.65 0.65 0.65 6250\n", "weighted avg 0.65 0.65 0.65 6250\n", "\n", "[[2038 1087]\n", " [1087 2038]]\n" ] } ], "source": [ "#Random forest machine leanring models\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "rfc = GradientBoostingClassifier(n_estimators=50, learning_rate=0.2,max_depth=50, random_state=0)\n", "rfc.fit(X, y_train)\n", "# calculate accuracy of class predictions\n", "y_pred_class = rfc.predict(y)\n", "print(accuracy_score(y_test, y_pred_class))\n", "print(classification_report(y_test, y_pred_class))\n", "print(confusion_matrix(y_test, y_pred_class))" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SVC\n", "0.87744\n", " precision recall f1-score support\n", "\n", " 0 0.89 0.86 0.88 3125\n", " 1 0.87 0.89 0.88 3125\n", "\n", " accuracy 0.88 6250\n", " macro avg 0.88 0.88 0.88 6250\n", "weighted avg 0.88 0.88 0.88 6250\n", "\n", "[[2690 435]\n", " [ 331 2794]]\n" ] } ], "source": [ "from sklearn.svm import SVC\n", "print(\"SVC\")\n", "svm = SVC(kernel='linear', C=1.0)\n", "svm.fit(X, y_train)\n", "y_pred_class = svm.predict(y)\n", "print(accuracy_score(y_test, y_pred_class))\n", "print(classification_report(y_test, y_pred_class))\n", "print(confusion_matrix(y_test, y_pred_class))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Word2Vec" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from gensim.models import Word2Vec\n", "from sklearn import preprocessing\n", "lab_enc = preprocessing.LabelEncoder()\n", "lab_enc.fit(data['label'])\n", "newdata=data\n", "tokenized_data = newdata['cleanText'].apply(lambda x: x.split())\n", "model_w2v = Word2Vec( tokenized_data, vector_size=2000,window=10, min_count=2,sg = 1, hs = 0, negative = 10, workers= 2, seed = 34)\n", "model_w2v.train(tokenized_data, total_examples= len(newdata['cleanText']), epochs=20)\n", "def word_vector(tokens, size):\n", " vec = np.zeros(size).reshape((1, size))\n", " count = 0.\n", " for word in tokens:\n", " try:\n", " vec += model_w2v.wv[word].reshape((1, size))\n", " count += 1.\n", " except KeyError:\n", " continue\n", " if count != 0:\n", " vec /= count\n", " return vec\n", "wordvec_arrays = np.zeros((len(tokenized_data), 2000))\n", "for i in range(len(tokenized_data)):\n", " wordvec_arrays[i,:] = word_vector(tokenized_data[i], 2000)\n", "data_feature_3 = pd.DataFrame(wordvec_arrays)\n", "df_tex = data_feature_3\n", "df_sen = lab_enc.transform(data['label'])\n", "X, y, y_train, y_test = train_test_split(df_tex, df_sen, test_size=0.25, random_state=2, stratify=df_sen)\n", "y_train = y_train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Logistic regression for machine leanring\n", "from sklearn.linear_model import LogisticRegression\n", "Lr = LogisticRegression(solver=\"sag\",multi_class=\"ovr\",C=1.0)\n", "Lr.fit(X, y_train)\n", "y_pred_class = Lr.predict(y)\n", "print(accuracy_score(y_test, y_pred_class))\n", "print(classification_report(y_test, y_pred_class))\n", "print(confusion_matrix(y_test, y_pred_class))" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sohail_Comp\\AppData\\Roaming\\Python\\Python37\\site-packages\\sklearn\\linear_model\\_sag.py:329: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " \"the coef_ did not converge\", ConvergenceWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " Negative 0.83 0.90 0.86 1675\n", " Positive 0.96 0.93 0.95 4570\n", "\n", " accuracy 0.92 6245\n", " macro avg 0.89 0.92 0.90 6245\n", "weighted avg 0.93 0.92 0.92 6245\n", "\n", "[[1512 163]\n", " [ 320 4250]]\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n", "svm = SVC(kernel='linear',C=1.0)\n", "Lr = LogisticRegression(solver=\"sag\",multi_class=\"ovr\",C=1.0)\n", "er = VotingClassifier([('rf', svm),('etc', Lr)],voting=\"hard\")\n", "\n", "x=er.fit(X, y_train)\n", "\n", "y_pred_class = er.predict(y)\n", "print(classification_report(y_test,y_pred_class))\n", "print(confusion_matrix(y_test,y_pred_class))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Counter({0: 12500, 1: 12500})" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from collections import Counter\n", "Counter(data[\"label\"])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "import numpy as np # linear algebra\n", "\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "import matplotlib.pyplot as plt\n", "# Input data files are available in the \"../input/\" directory.\n", "# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n", "\n", "import os\n", "import glob\n", "import cv2\n", "import tensorflow as tf\n", "from keras import layers\n", "from keras.layers import Dropout , Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D\n", "from keras.models import Model, load_model\n", "from keras.initializers import glorot_uniform\n", "from sklearn.model_selection import train_test_split\n", "import keras.backend as K\n", "from sklearn.utils import shuffle\n", "# importing all necessary libraries to run the code\n", "import re,string\n", "import numpy as np\n", "import pandas as pd\n", "import keras_metrics\n", "import tensorflow.keras\n", "import matplotlib.pyplot as plt\n", "from nltk.corpus import stopwords\n", "from keras.models import Sequential\n", "from sklearn.model_selection import train_test_split\n", "from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation,Embedding\n", "# using the variable sw to hold all stopwords that are in English\n", "sw = stopwords.words('english')" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "176/176 - 675s - loss: 0.4575 - accuracy: 0.7900 - mae: 0.2538 - mse: 0.1568 - precision: 0.4874 - recall: 0.1591 - f1_score: 0.2247 - val_loss: 0.2983 - val_accuracy: 0.8815 - val_mae: 0.1354 - val_mse: 0.0982 - val_precision: 0.7023 - val_recall: 0.4034 - val_f1_score: 0.5124\n", "Epoch 2/100\n", "176/176 - 678s - loss: 0.2310 - accuracy: 0.9066 - mae: 0.1073 - mse: 0.0731 - precision: 0.7565 - recall: 0.5312 - f1_score: 0.6234 - val_loss: 0.2412 - val_accuracy: 0.8939 - val_mae: 0.1150 - val_mse: 0.0788 - val_precision: 0.7845 - val_recall: 0.6106 - val_f1_score: 0.6867\n", "Epoch 3/100\n", "176/176 - 660s - loss: 0.1647 - accuracy: 0.9379 - mae: 0.0733 - mse: 0.0492 - precision: 0.8085 - recall: 0.6610 - f1_score: 0.7272 - val_loss: 0.2326 - val_accuracy: 0.9047 - val_mae: 0.1029 - val_mse: 0.0749 - val_precision: 0.8243 - val_recall: 0.6966 - val_f1_score: 0.7551\n", "Epoch 4/100\n", "176/176 - 685s - loss: 0.1315 - accuracy: 0.9503 - mae: 0.0576 - mse: 0.0394 - precision: 0.8376 - recall: 0.7257 - f1_score: 0.7776 - val_loss: 0.2411 - val_accuracy: 0.9131 - val_mae: 0.0946 - val_mse: 0.0744 - val_precision: 0.8463 - val_recall: 0.7470 - val_f1_score: 0.7936\n", "Epoch 5/100\n", "176/176 - 710s - loss: 0.1069 - accuracy: 0.9593 - mae: 0.0469 - mse: 0.0316 - precision: 0.8553 - recall: 0.7656 - f1_score: 0.8080 - val_loss: 0.2984 - val_accuracy: 0.9059 - val_mae: 0.0971 - val_mse: 0.0803 - val_precision: 0.8619 - val_recall: 0.7802 - val_f1_score: 0.8190\n", "Epoch 6/100\n", "176/176 - 697s - loss: 0.0981 - accuracy: 0.9636 - mae: 0.0429 - mse: 0.0287 - precision: 0.8688 - recall: 0.7936 - f1_score: 0.8295 - val_loss: 0.2789 - val_accuracy: 0.8867 - val_mae: 0.1285 - val_mse: 0.0916 - val_precision: 0.8724 - val_recall: 0.8036 - val_f1_score: 0.8366\n", "Epoch 7/100\n", "176/176 - 698s - loss: 0.0963 - accuracy: 0.9646 - mae: 0.0417 - mse: 0.0281 - precision: 0.8763 - recall: 0.8127 - f1_score: 0.8433 - val_loss: 0.2815 - val_accuracy: 0.9023 - val_mae: 0.0996 - val_mse: 0.0836 - val_precision: 0.8797 - val_recall: 0.8205 - val_f1_score: 0.8491\n", "Epoch 8/100\n", "176/176 - 714s - loss: 0.0742 - accuracy: 0.9733 - mae: 0.0316 - mse: 0.0212 - precision: 0.8836 - recall: 0.8285 - f1_score: 0.8551 - val_loss: 0.3015 - val_accuracy: 0.9135 - val_mae: 0.0900 - val_mse: 0.0752 - val_precision: 0.8874 - val_recall: 0.8353 - val_f1_score: 0.8606\n", "Epoch 9/100\n", "176/176 - 716s - loss: 0.0636 - accuracy: 0.9762 - mae: 0.0275 - mse: 0.0186 - precision: 0.8913 - recall: 0.8418 - f1_score: 0.8658 - val_loss: 0.3689 - val_accuracy: 0.9051 - val_mae: 0.0974 - val_mse: 0.0861 - val_precision: 0.8943 - val_recall: 0.8469 - val_f1_score: 0.8700\n", "Epoch 10/100\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;31m#Here we train the Network.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[0mpred\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_valid\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY_valid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 23\u001b[0m \u001b[0mpred\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1156\u001b[0m _r=1):\n\u001b[0;32m 1157\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1158\u001b[1;33m \u001b[0mtmp_logs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1159\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1160\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 887\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 888\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 889\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 890\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 891\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 915\u001b[0m \u001b[1;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 916\u001b[0m \u001b[1;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 917\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=not-callable\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 918\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 919\u001b[0m \u001b[1;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 3022\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[0;32m 3023\u001b[0m return graph_function._call_flat(\n\u001b[1;32m-> 3024\u001b[1;33m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0m\u001b[0;32m 3025\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3026\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1959\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1960\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1961\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1962\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1963\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 594\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 595\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 596\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 597\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 598\u001b[0m outputs = execute.execute_with_cancellation(\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[1;32m---> 60\u001b[1;33m inputs, attrs, num_outputs)\n\u001b[0m\u001b[0;32m 61\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from keras import optimizers\n", "import tensorflow.keras\n", "tokenizer = tensorflow.keras.preprocessing.text.Tokenizer(num_words=5000, lower=True,split=' ',filters='!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n')\n", "tokenizer.fit_on_texts(dataF[\"cleanText\"].values)\n", "#print(tokenizer.word_index) # To see the dicstionary\n", "X = tokenizer.texts_to_sequences(dataF[\"cleanText\"].values)\n", "X = tensorflow.keras.preprocessing.sequence.pad_sequences(X)\n", "#Deep Learning Network Structure\n", "model_conv = Sequential()\n", "model_conv.add(Embedding(5000,100, input_length=X.shape[1]))\n", "model_conv.add(Dropout(0.5))\n", "model_conv.add(LSTM(100))\n", "model_conv.add(Dense(2, activation='softmax'))\n", "model_conv.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy','mae','mse',keras_metrics.precision(), keras_metrics.recall(),keras_metrics.f1_score()])\n", "#Y = data.Reviewer_Score\n", "Y = pd.get_dummies(dataF['TextBlob Sentiment']).values\n", "\n", "X_train, X_valid, Y_train, Y_valid = train_test_split(X,Y, test_size = 0.25, random_state =2)\n", "batch_size=128\n", "#Here we train the Network.\n", "\n", "pred=model_conv.fit(X_train, Y_train, batch_size =batch_size, epochs =100, verbose =2,validation_data=(X_valid,Y_valid))\n", "pred" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sohail_Comp\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\sequential.py:450: UserWarning: `model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`, if your model does multi-class classification (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype(\"int32\")`, if your model does binary classification (e.g. if it uses a `sigmoid` last-layer activation).\n", " warnings.warn('`model.predict_classes()` is deprecated and '\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.79 0.81 647\n", " 1 0.93 0.94 0.94 1851\n", "\n", " accuracy 0.90 2498\n", " macro avg 0.88 0.87 0.87 2498\n", "weighted avg 0.90 0.90 0.90 2498\n", "\n", "[[ 513 134]\n", " [ 106 1745]]\n" ] } ], "source": [ "rounded_predictions = model_conv.predict_classes(X_valid, batch_size=128, verbose=0)\n", "rounded_labels=np.argmax(Y_valid, axis=1)\n", "print(classification_report(rounded_labels,rounded_predictions))\n", "print(confusion_matrix(rounded_labels,rounded_predictions))" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "176/176 - 333s - loss: 0.4245 - accuracy: 0.8001 - val_loss: 0.2785 - val_accuracy: 0.8799\n", "Epoch 2/100\n", "176/176 - 339s - loss: 0.1993 - accuracy: 0.9195 - val_loss: 0.2127 - val_accuracy: 0.9119\n", "Epoch 3/100\n", "176/176 - 350s - loss: 0.1264 - accuracy: 0.9521 - val_loss: 0.2413 - val_accuracy: 0.9087\n", "Epoch 4/100\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mmodel_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mDense\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'softmax'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mmodel_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'accuracy'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[0mpred\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_valid\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY_valid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11\u001b[0m \u001b[0mpred\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1156\u001b[0m _r=1):\n\u001b[0;32m 1157\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1158\u001b[1;33m \u001b[0mtmp_logs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1159\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1160\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 887\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 888\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 889\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 890\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 891\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 915\u001b[0m \u001b[1;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 916\u001b[0m \u001b[1;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 917\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=not-callable\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 918\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 919\u001b[0m \u001b[1;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 3022\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[0;32m 3023\u001b[0m return graph_function._call_flat(\n\u001b[1;32m-> 3024\u001b[1;33m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0m\u001b[0;32m 3025\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3026\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1959\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1960\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1961\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1962\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1963\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 594\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 595\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 596\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 597\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 598\u001b[0m outputs = execute.execute_with_cancellation(\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[1;32m---> 60\u001b[1;33m inputs, attrs, num_outputs)\n\u001b[0m\u001b[0;32m 61\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D, Dropout, Activation,Embedding\n", "from keras.models import Sequential\n", "model_conv = Sequential()\n", "model_conv.add(Embedding(5000, 100, input_length=X.shape[1]))\n", "model_conv.add(Conv1D(128, 2, activation='relu'))\n", "model_conv.add(MaxPooling1D(pool_size=2))\n", "model_conv.add(LSTM(100))\n", "model_conv.add(Dense(2, activation='softmax'))\n", "model_conv.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "pred=model_conv.fit(X_train, Y_train, batch_size =batch_size, epochs =100, verbose =2,validation_data=(X_valid,Y_valid))\n", "pred" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sohail_Comp\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\sequential.py:450: UserWarning: `model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`, if your model does multi-class classification (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype(\"int32\")`, if your model does binary classification (e.g. if it uses a `sigmoid` last-layer activation).\n", " warnings.warn('`model.predict_classes()` is deprecated and '\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.78 0.88 0.83 647\n", " 1 0.96 0.91 0.93 1851\n", "\n", " accuracy 0.90 2498\n", " macro avg 0.87 0.90 0.88 2498\n", "weighted avg 0.91 0.90 0.91 2498\n", "\n", "[[ 568 79]\n", " [ 160 1691]]\n" ] } ], "source": [ "rounded_predictions = model_conv.predict_classes(X_valid, batch_size=128, verbose=0)\n", "rounded_labels=np.argmax(Y_valid, axis=1)\n", "print(classification_report(rounded_labels,rounded_predictions))\n", "print(confusion_matrix(rounded_labels,rounded_predictions))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sohail_Comp\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\base_layer.py:1307: UserWarning: `layer.updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n", " warnings.warn('`layer.updates` will be removed in a future version. '\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "WARNING:tensorflow:`add_update` `inputs` kwarg has been deprecated. You no longer need to pass a value to `inputs` as it is being automatically inferred.\n", "1405/1405 - 1024s - loss: 0.5827 - accuracy: 0.7326 - mae: 0.3215 - mse: 0.2178 - precision: 0.3331 - recall: 0.0153 - f1_score: 0.0286 - val_loss: 0.5918 - val_accuracy: 0.7230 - val_mae: 0.3491 - val_mse: 0.2134 - val_precision: 0.3534 - val_recall: 0.0065 - val_f1_score: 0.0128\n", "Epoch 2/100\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_valid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_valid\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_size\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0.1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;31m#Here we train the Network.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mpred\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel_conv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m \u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_valid\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY_valid\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[0mpred\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1156\u001b[0m _r=1):\n\u001b[0;32m 1157\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1158\u001b[1;33m \u001b[0mtmp_logs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1159\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1160\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 887\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 888\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 889\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 890\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 891\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 915\u001b[0m \u001b[1;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 916\u001b[0m \u001b[1;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 917\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=not-callable\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 918\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 919\u001b[0m \u001b[1;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 3022\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[0;32m 3023\u001b[0m return graph_function._call_flat(\n\u001b[1;32m-> 3024\u001b[1;33m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0m\u001b[0;32m 3025\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3026\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1959\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1960\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1961\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1962\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1963\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 594\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 595\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 596\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 597\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 598\u001b[0m outputs = execute.execute_with_cancellation(\n", "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[1;32m---> 60\u001b[1;33m inputs, attrs, num_outputs)\n\u001b[0m\u001b[0;32m 61\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from keras import optimizers\n", "#Deep Learning Network Structure\n", "model_conv = Sequential()\n", "model_conv.add(Embedding(5000,100, input_length=X.shape[1]))\n", "model_conv.add(Dropout(0.5))\n", "model_conv.add(layers.GRU(256, return_sequences=True))\n", "model_conv.add(layers.SimpleRNN(128))\n", "model_conv.add(Dense(2, activation='softmax'))\n", "model_conv.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy','mae','mse',keras_metrics.precision(), keras_metrics.recall(),keras_metrics.f1_score()])\n", "batch_size=16\n", "X_train, X_valid, Y_train, Y_valid = train_test_split(X,Y, test_size = 0.1)\n", "#Here we train the Network.\n", "pred=model_conv.fit(X_train, Y_train, batch_size =batch_size, epochs =100, verbose =2,validation_data=(X_valid,Y_valid))\n", "pred" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sohail_Comp\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\engine\\sequential.py:450: UserWarning: `model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`, if your model does multi-class classification (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype(\"int32\")`, if your model does binary classification (e.g. if it uses a `sigmoid` last-layer activation).\n", " warnings.warn('`model.predict_classes()` is deprecated and '\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.84 0.88 0.86 3715\n", " 1 0.88 0.83 0.85 3785\n", "\n", " accuracy 0.86 7500\n", " macro avg 0.86 0.86 0.86 7500\n", "weighted avg 0.86 0.86 0.86 7500\n", "\n", "[[3272 443]\n", " [ 642 3143]]\n" ] } ], "source": [ "rounded_predictions = model_conv.predict_classes(X_valid, batch_size=128, verbose=0)\n", "rounded_labels=np.argmax(Y_valid, axis=1)\n", "print(classification_report(rounded_labels,rounded_predictions))\n", "print(confusion_matrix(rounded_labels,rounded_predictions))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# TF-IDF On Sample Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "a=[\"gwyneth paltrow absolute great movie\",\"own movie number movie didnt like choice do\", \"wish show would come back tel\"]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
absolutebackchoicecomedidntdogreatgwynethlikemovienumberownpaltrowshowtelwishwould
010000011010010000
100101100121100000
201010000000001111
\n", "
" ], "text/plain": [ " absolute back choice come didnt do great gwyneth like movie \\\n", "0 1 0 0 0 0 0 1 1 0 1 \n", "1 0 0 1 0 1 1 0 0 1 2 \n", "2 0 1 0 1 0 0 0 0 0 0 \n", "\n", " number own paltrow show tel wish would \n", "0 0 0 1 0 0 0 0 \n", "1 1 1 0 0 0 0 0 \n", "2 0 0 0 1 1 1 1 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "vectorizer1 = CountVectorizer(max_features=500)\n", "X_train_tf1 = vectorizer1.fit_transform(a)\n", "df1 = pd.DataFrame(X_train_tf1.toarray(), columns=vectorizer1.get_feature_names())\n", "df1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
absolutebackchoicecomedidntdogreatgwynethlikemovienumberownpaltrowshowtelwishwould
00.4673510.0000000.0000000.0000000.0000000.0000000.4673510.4673510.0000000.3554320.0000000.0000000.4673510.0000000.0000000.0000000.000000
10.0000000.0000000.3468210.0000000.3468210.3468210.0000000.0000000.3468210.5275330.3468210.3468210.0000000.0000000.0000000.0000000.000000
20.0000000.4082480.0000000.4082480.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.4082480.4082480.4082480.408248
\n", "
" ], "text/plain": [ " absolute back choice come didnt do great \\\n", "0 0.467351 0.000000 0.000000 0.000000 0.000000 0.000000 0.467351 \n", "1 0.000000 0.000000 0.346821 0.000000 0.346821 0.346821 0.000000 \n", "2 0.000000 0.408248 0.000000 0.408248 0.000000 0.000000 0.000000 \n", "\n", " gwyneth like movie number own paltrow show \\\n", "0 0.467351 0.000000 0.355432 0.000000 0.000000 0.467351 0.000000 \n", "1 0.000000 0.346821 0.527533 0.346821 0.346821 0.000000 0.000000 \n", "2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.408248 \n", "\n", " tel wish would \n", "0 0.000000 0.000000 0.000000 \n", "1 0.000000 0.000000 0.000000 \n", "2 0.408248 0.408248 0.408248 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", "vectorizer1 = TfidfVectorizer(max_features=500)\n", "X_train_tf1 = vectorizer1.fit_transform(a)\n", "df1 = pd.DataFrame(X_train_tf1.toarray(), columns=vectorizer1.get_feature_names())\n", "df1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# T Test" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "t=-0.182, df=9\n", "Reject the null hypothesis that the means are not equal.\n" ] } ], "source": [ "# t-test\n", "from math import sqrt\n", "from numpy.random import seed\n", "from numpy.random import randn\n", "from numpy import mean\n", "from scipy.stats import t\n", "\n", "# function for calculating the t-test for two dependent samples\n", "def dependent_ttest(data1, data2, alpha):\n", "\t# calculate means\n", "\tmean1, mean2 = mean(data1), mean(data2)\n", "\t# number of paired samples\n", "\tn = len(data1)\n", "\t# sum squared difference between observations\n", "\td1 = sum([(data1[i]-data2[i])**2 for i in range(n)])\n", "\t# sum difference between observations\n", "\td2 = sum([data1[i]-data2[i] for i in range(n)])\n", "\t# standard deviation of the difference between means\n", "\tsd = sqrt((d1 - (d2**2 / n)) / (n - 1))\n", "\t# standard error of the difference between the means\n", "\tsed = sd / sqrt(n)\n", "\t# calculate the t statistic\n", "\tt_stat = (mean1 - mean2) / sed\n", "\t# degrees of freedom\n", "\tdf = n - 1\n", "\t# calculate the critical value\n", "\tcv = t.ppf(1.0 - alpha, df)\n", "\t# calculate the p-value\n", "\tp = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n", "\t# return everything\n", "\treturn t_stat, df, cv, p\n", "\n", "# seed the random number generator\n", "seed(1)\n", "\n", "\n", "data1 = [0.92, 0.92, 0.88, 0.90, 0.96, 0.78, 0.87, 0.94, 0.82, 0.88]\n", "data2 = [0.89, 0.88, 0.90, 0.89, 0.90, 0.88, 0.89, 0.89, 0.89, 0.89]\n", "data3 = [0.92, 0.92, 0.88, 0.90, 0.96, 0.78, 0.87, 0.94, 0.82, 0.88]\n", "\n", "\n", "# calculate the t test\n", "alpha =0.5\n", "t_stat, df, cv, p = dependent_ttest(data1, data2, alpha)\n", "print('t=%.3f, df=%d' % (t_stat, df))\n", "# interpret via critical value\n", "if abs(t_stat) <= cv:\n", "\tprint('Accept null hypothesis that the means are equal.')\n", "else:\n", "\tprint('Reject the null hypothesis that the means are not equal.')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "IMDB.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 1 }