{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import string\n", "import re\n", "import tensorflow\n", "from os import listdir\n", "from numpy import array\n", "from collections import Counter\n", "from nltk.corpus import stopwords\n", "import unicodedata as ud\n", "from keras.preprocessing.text import Tokenizer\n", "from keras.preprocessing.sequence import pad_sequences\n", "from keras.utils.vis_utils import plot_model\n", "from keras.models import Sequential\n", "from keras.layers import Dense,Dropout\n", "from keras.layers import Flatten,BatchNormalization,LSTM\n", "from keras.layers import Embedding\n", "from keras.layers.convolutional import Conv1D,Conv2D\n", "from keras.layers.convolutional import MaxPooling1D\n", "from keras.models import load_model" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def load_doc(filename):\n", "\n", " file = open(filename, 'r',encoding=\"utf8\")\n", " text = file.read()\n", " file.close()\n", " return text\n", "\n", "def clean_doc(doc,vocab):\n", " \n", " \n", " tokens = ''.join(c for c in doc if not ud.category(c).startswith('P'))\n", " tokens = ''.join(c for c in tokens if not ud.category(c).startswith('Lm')) \n", " tokens = tokens.split()\n", " tokens = [w for w in tokens if w in vocab]\n", " tokens = ' '.join(tokens)\n", " \n", " return tokens\n", "\n", "\n", "\n", "def process_docs(directory, vocab):\n", " documents = list()\n", "\n", " for filename in listdir(directory):\n", " path = directory + '/' + filename\n", " doc = load_doc(path)\n", " tokens = clean_doc(doc, vocab)\n", " documents.append(tokens)\n", " return documents\n", " \n", " \n", "\n", "def load_clean_dataset(vocab):\n", "\n", " neutral = process_docs('Dataset/Sentiment/Neutral', vocab)\n", " positive = process_docs('Dataset/Sentiment/Positive', vocab)\n", " negative = process_docs('Dataset/Sentiment/Negative', vocab)\n", " docs = negative + neutral + positive\n", " labels = array([[1,0,0] for _ in range(len(negative))] + [[0,1,0] for _ in range(len(neutral))] + [[0,0,1] for _ in range(len(positive))])\n", " return docs, labels\n", "\n", "\n", "\n", "\n", "def load_clean_dataset_test(vocab):\n", "\n", " neutral = process_docs('Dataset/Sentiment/Neutral_Test', vocab)\n", " positive = process_docs('Dataset/Sentiment/Positive_Test', vocab)\n", " negative = process_docs('Dataset/Sentiment/Negative_Test', vocab)\n", " docs = negative + neutral + positive\n", " labels = array([[1,0,0] for _ in range(len(negative))] + [[0,1,0] for _ in range(len(neutral))] + [[0,0,1] for _ in range(len(positive))])\n", " return docs, labels\n", "\n", "def create_tokenizer(lines):\n", " tokenizer = Tokenizer()\n", " tokenizer.fit_on_texts(lines)\n", " return tokenizer\n", "\n", "\n", "\n", "\n", " \n", "def encode_docs(tokenizer, max_length, docs):\n", "\n", " encoded = tokenizer.texts_to_sequences(docs)\n", "\n", " padded = pad_sequences(encoded, maxlen=max_length, padding='post')\n", " return padded\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def load_clean_dataset_testNeutral(vocab):\n", "\n", " \n", " neutral = process_docs('Dataset/Sentiment/Neutral_Test', vocab)\n", " docs = neutral\n", " labels = array([[0,1,0] for _ in range(len(neutral))])\n", "\n", " return docs, labels\n", "\n", "\n", "def load_clean_dataset_testNegative(vocab):\n", "\n", " \n", " negative = process_docs('Dataset/Sentiment/Negative_Test', vocab)\n", " docs = negative\n", " labels = array([[1,0,0] for _ in range(len(negative))])\n", "\n", " return docs, labels\n", "\n", "def load_clean_dataset_testPositive(vocab):\n", "\n", " \n", " positive = process_docs('Dataset/Sentiment/Positive_Test', vocab)\n", " docs = positive\n", " labels = array([[0,0,1] for _ in range(len(positive))])\n", "\n", " return docs, labels\n", "\n", "\n", "\n", "def define_model(vocab_size, max_length):\n", " model = Sequential()\n", " model.add(Embedding(vocab_size, 256, input_length=max_length))\n", " model.add(Conv1D(filters=64, kernel_size=8, activation='relu'))\n", " model.add(Conv1D(filters=64, kernel_size=8, activation='relu'))\n", " model.add(MaxPooling1D(pool_size=2))\n", " model.add(Flatten())\n", " model.add(Dropout(0.1))\n", " model.add(Dense(10, activation='relu'))\n", " model.add(Dense(3, activation='softmax'))\n", " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", " model.summary()\n", " plot_model(model, to_file='BalancedSentimentModVocab2cnn.png', show_shapes=True)\n", " return model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_5\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "embedding_5 (Embedding) (None, 450, 256) 2594560 \n", "_________________________________________________________________\n", "conv1d_6 (Conv1D) (None, 443, 64) 131136 \n", "_________________________________________________________________\n", "conv1d_7 (Conv1D) (None, 436, 64) 32832 \n", "_________________________________________________________________\n", "max_pooling1d_5 (MaxPooling1 (None, 218, 64) 0 \n", "_________________________________________________________________\n", "flatten_5 (Flatten) (None, 13952) 0 \n", "_________________________________________________________________\n", "dropout_5 (Dropout) (None, 13952) 0 \n", "_________________________________________________________________\n", "dense_10 (Dense) (None, 10) 139530 \n", "_________________________________________________________________\n", "dense_11 (Dense) (None, 3) 33 \n", "=================================================================\n", "Total params: 2,898,091\n", "Trainable params: 2,898,091\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Epoch 1/25\n" ] }, { "ename": "InvalidArgumentError", "evalue": " logits and labels must have the same first dimension, got logits shape [32,3] and labels shape [96]\n\t [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at :14) ]] [Op:__inference_train_function_32013]\n\nFunction call stack:\ntrain_function\n", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mXtrain\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mencode_docs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmax_length\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_docs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdefine_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvocab_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmax_length\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mXtrain\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mytrain\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m25\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'BalancedSentimentModVocab2cnn.h5'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 106\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 107\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 108\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 109\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 110\u001b[0m \u001b[1;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1096\u001b[0m batch_size=batch_size):\n\u001b[0;32m 1097\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1098\u001b[1;33m \u001b[0mtmp_logs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1099\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1100\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 778\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 779\u001b[0m \u001b[0mcompiler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"nonXla\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 780\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 781\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 782\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 838\u001b[0m \u001b[1;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 839\u001b[0m \u001b[1;31m# stateless function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 840\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 841\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 842\u001b[0m \u001b[0mcanon_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2827\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2828\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2829\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2830\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2831\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1846\u001b[0m resource_variable_ops.BaseResourceVariable))],\n\u001b[0;32m 1847\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1848\u001b[1;33m cancellation_manager=cancellation_manager)\n\u001b[0m\u001b[0;32m 1849\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1850\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1922\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1923\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1924\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1925\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1926\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 549\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 550\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 551\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 552\u001b[0m outputs = execute.execute_with_cancellation(\n", "\u001b[1;32m~\\anaconda3\\envs\\NoorEnv\\lib\\site-packages\\tensorflow\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[1;32m---> 60\u001b[1;33m inputs, attrs, num_outputs)\n\u001b[0m\u001b[0;32m 61\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mInvalidArgumentError\u001b[0m: logits and labels must have the same first dimension, got logits shape [32,3] and labels shape [96]\n\t [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at :14) ]] [Op:__inference_train_function_32013]\n\nFunction call stack:\ntrain_function\n" ] } ], "source": [ "vocab_filename = 'SentimentBalancedVocabModified.txt'\n", "vocab = load_doc(vocab_filename)\n", "vocab = set(vocab.split())\n", "\n", "train_docs, ytrain = load_clean_dataset(vocab)\n", "test_docs, ytest = load_clean_dataset_test(vocab)\n", "tokenizer = create_tokenizer(train_docs)\n", "\n", "vocab_size = len(tokenizer.word_index) + 1\n", "#max_length = max([len(s.split()) for s in train_docs])\n", "max_length = 450\n", "Xtrain = encode_docs(tokenizer, max_length, train_docs)\n", "model = define_model(vocab_size, max_length)\n", "model.fit(Xtrain, ytrain, epochs=25, verbose=2)\n", "model.save('BalancedSentimentModVocab2cnn.h5')\n", "\n", "\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "\n", "\n", "test_docs, ytest = load_clean_dataset_testNeutral(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "\n", "test_docs, ytest = load_clean_dataset_testPositive(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "test_docs, ytest = load_clean_dataset_testNegative(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_4\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "embedding_4 (Embedding) (None, 450, 256) 2594560 \n", "_________________________________________________________________\n", "conv1d_5 (Conv1D) (None, 443, 32) 65568 \n", "_________________________________________________________________\n", "max_pooling1d_4 (MaxPooling1 (None, 221, 32) 0 \n", "_________________________________________________________________\n", "flatten_4 (Flatten) (None, 7072) 0 \n", "_________________________________________________________________\n", "dropout_4 (Dropout) (None, 7072) 0 \n", "_________________________________________________________________\n", "dense_8 (Dense) (None, 10) 70730 \n", "_________________________________________________________________\n", "dense_9 (Dense) (None, 3) 33 \n", "=================================================================\n", "Total params: 2,730,891\n", "Trainable params: 2,730,891\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "Epoch 1/25\n", "118/118 - 14s - loss: 1.0220 - accuracy: 0.4865\n", "Epoch 2/25\n", "118/118 - 13s - loss: 0.5850 - accuracy: 0.7654\n", "Epoch 3/25\n", "118/118 - 13s - loss: 0.2493 - accuracy: 0.9186\n", "Epoch 4/25\n", "118/118 - 13s - loss: 0.1334 - accuracy: 0.9581\n", "Epoch 5/25\n", "118/118 - 13s - loss: 0.0953 - accuracy: 0.9680\n", "Epoch 6/25\n", "118/118 - 13s - loss: 0.0840 - accuracy: 0.9738\n", "Epoch 7/25\n", "118/118 - 14s - loss: 0.0694 - accuracy: 0.9760\n", "Epoch 8/25\n", "118/118 - 13s - loss: 0.0614 - accuracy: 0.9752\n", "Epoch 9/25\n", "118/118 - 13s - loss: 0.0604 - accuracy: 0.9746\n", "Epoch 10/25\n", "118/118 - 13s - loss: 0.0533 - accuracy: 0.9773\n", "Epoch 11/25\n", "118/118 - 13s - loss: 0.0567 - accuracy: 0.9752\n", "Epoch 12/25\n", "118/118 - 14s - loss: 0.0439 - accuracy: 0.9792\n", "Epoch 13/25\n", "118/118 - 13s - loss: 0.0398 - accuracy: 0.9789\n", "Epoch 14/25\n", "118/118 - 12s - loss: 0.0440 - accuracy: 0.9792\n", "Epoch 15/25\n", "118/118 - 12s - loss: 0.0381 - accuracy: 0.9808\n", "Epoch 16/25\n", "118/118 - 12s - loss: 0.0361 - accuracy: 0.9821\n", "Epoch 17/25\n", "118/118 - 12s - loss: 0.0318 - accuracy: 0.9819\n", "Epoch 18/25\n", "118/118 - 12s - loss: 0.0354 - accuracy: 0.9797\n", "Epoch 19/25\n", "118/118 - 12s - loss: 0.0346 - accuracy: 0.9795\n", "Epoch 20/25\n", "118/118 - 12s - loss: 0.0308 - accuracy: 0.9816\n", "Epoch 21/25\n", "118/118 - 12s - loss: 0.0302 - accuracy: 0.9816\n", "Epoch 22/25\n", "118/118 - 12s - loss: 0.0439 - accuracy: 0.9803\n", "Epoch 23/25\n", "118/118 - 12s - loss: 0.0464 - accuracy: 0.9795\n", "Epoch 24/25\n", "118/118 - 12s - loss: 0.0418 - accuracy: 0.9805\n", "Epoch 25/25\n", "118/118 - 12s - loss: 0.0333 - accuracy: 0.9789\n", "Test Accuracy: 56.614512\n", "Test Accuracy: 70.297033\n", "Test Accuracy: 34.000000\n", "Test Accuracy: 58.499998\n" ] } ], "source": [ "def define_model(vocab_size, max_length):\n", " model = Sequential()\n", " model.add(Embedding(vocab_size, 256, input_length=max_length))\n", " model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))\n", " model.add(MaxPooling1D(pool_size=2))\n", " model.add(Flatten())\n", " model.add(Dropout(0.1))\n", " model.add(Dense(10, activation='relu'))\n", " model.add(Dense(3, activation='softmax'))\n", " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", " model.summary()\n", " plot_model(model, to_file='BalancedSentimentModVocab32f.png', show_shapes=True)\n", " return model\n", "\n", "\n", "vocab_filename = 'SentimentBalancedVocabModified.txt'\n", "vocab = load_doc(vocab_filename)\n", "vocab = set(vocab.split())\n", "\n", "train_docs, ytrain = load_clean_dataset(vocab)\n", "test_docs, ytest = load_clean_dataset_test(vocab)\n", "tokenizer = create_tokenizer(train_docs)\n", "\n", "vocab_size = len(tokenizer.word_index) + 1\n", "#max_length = max([len(s.split()) for s in train_docs])\n", "max_length = 450\n", "Xtrain = encode_docs(tokenizer, max_length, train_docs)\n", "model = define_model(vocab_size, max_length)\n", "model.fit(Xtrain, ytrain, epochs=25, verbose=2)\n", "model.save('BalancedSentimentModVocab32f.h5')\n", "\n", "\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "\n", "\n", "test_docs, ytest = load_clean_dataset_testNeutral(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "\n", "test_docs, ytest = load_clean_dataset_testPositive(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "test_docs, ytest = load_clean_dataset_testNegative(vocab)\n", "Xtest = encode_docs(tokenizer, max_length, test_docs)\n", "_, acc = model.evaluate(Xtest, ytest, verbose=0)\n", "print('Test Accuracy: %f' % (acc*100))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "NoorEnv", "language": "python", "name": "noorenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }