{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "cilIdrLI_45O"
   },
   "source": [
    "**Import Libraries and Data** "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "JQombcEn_45S"
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "DXvzRjJA_45Y"
   },
   "outputs": [],
   "source": [
    "# import BERT tokenization\n",
    "\n",
    "!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "w78aCQUjAPrO",
    "outputId": "059c06ee-6569-44b9-9a8b-3db63408b400"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting tokenization\n",
      "  Downloading tokenization-1.0.7-py3-none-any.whl (10 kB)\n",
      "Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from tokenization) (2022.6.2)\n",
      "Installing collected packages: tokenization\n",
      "Successfully installed tokenization-1.0.7\n"
     ]
    }
   ],
   "source": [
    "pip install tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "if3e9neA_45a"
   },
   "outputs": [],
   "source": [
    "import tokenization\n",
    "import tensorflow as tf\n",
    "import tensorflow_hub as hub\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "LKDX7i3jAbNz",
    "outputId": "c68a6631-e570-4c3a-be08-b36c9d354535"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mounted at /content/drive\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "import os\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "lC91JoTC_45c"
   },
   "outputs": [],
   "source": [
    "data_t = pd.read_csv('/content/drive/My Drive/smartphone_preprocessed_sentiments.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zKckPxr2m1sv"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 243
    },
    "id": "xFhCh4RZ_45e",
    "outputId": "af15ca6a-5a8b-4ebe-f6e5-a20ad14c9dcb"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "  <div id=\"df-59456acc-db45-4e4c-b0af-ba09f7b453cd\">\n",
       "    <div class=\"colab-df-container\">\n",
       "      <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Unnamed: 0.1</th>\n",
       "      <th>Date</th>\n",
       "      <th>User</th>\n",
       "      <th>Location</th>\n",
       "      <th>Tweets</th>\n",
       "      <th>c_Tweets</th>\n",
       "      <th>cleanText</th>\n",
       "      <th>Subjectivity</th>\n",
       "      <th>Polarity</th>\n",
       "      <th>Sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2022-10-30 23:00:47+00:00</td>\n",
       "      <td>GhostlyOwl</td>\n",
       "      <td>British Columbia</td>\n",
       "      <td>I will go anywhere provided it be forward. ~ D...</td>\n",
       "      <td>I will go anywhere provided it be forward. ~ D...</td>\n",
       "      <td>anywher provid forward david livingston anasta...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2022-10-30 22:48:15+00:00</td>\n",
       "      <td>Sensible_George</td>\n",
       "      <td>London, England</td>\n",
       "      <td>@them_apples Compel parking companies to retai...</td>\n",
       "      <td>Compel parking companies to retain pay machine...</td>\n",
       "      <td>compel park compani retain pay machin five mil...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-59456acc-db45-4e4c-b0af-ba09f7b453cd')\"\n",
       "              title=\"Convert this dataframe to an interactive table.\"\n",
       "              style=\"display:none;\">\n",
       "        \n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
       "  </svg>\n",
       "      </button>\n",
       "      \n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      flex-wrap:wrap;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "      <script>\n",
       "        const buttonEl =\n",
       "          document.querySelector('#df-59456acc-db45-4e4c-b0af-ba09f7b453cd button.colab-df-convert');\n",
       "        buttonEl.style.display =\n",
       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "        async function convertToInteractive(key) {\n",
       "          const element = document.querySelector('#df-59456acc-db45-4e4c-b0af-ba09f7b453cd');\n",
       "          const dataTable =\n",
       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                     [key], {});\n",
       "          if (!dataTable) return;\n",
       "\n",
       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "            + ' to learn more about interactive tables.';\n",
       "          element.innerHTML = '';\n",
       "          dataTable['output_type'] = 'display_data';\n",
       "          await google.colab.output.renderOutput(dataTable, element);\n",
       "          const docLink = document.createElement('div');\n",
       "          docLink.innerHTML = docLinkHtml;\n",
       "          element.appendChild(docLink);\n",
       "        }\n",
       "      </script>\n",
       "    </div>\n",
       "  </div>\n",
       "  "
      ],
      "text/plain": [
       "   Unnamed: 0  Unnamed: 0.1                       Date             User  \\\n",
       "1           1           1.0  2022-10-30 23:00:47+00:00       GhostlyOwl   \n",
       "2           2           2.0  2022-10-30 22:48:15+00:00  Sensible_George   \n",
       "\n",
       "           Location                                             Tweets  \\\n",
       "1  British Columbia  I will go anywhere provided it be forward. ~ D...   \n",
       "2   London, England  @them_apples Compel parking companies to retai...   \n",
       "\n",
       "                                            c_Tweets  \\\n",
       "1  I will go anywhere provided it be forward. ~ D...   \n",
       "2  Compel parking companies to retain pay machine...   \n",
       "\n",
       "                                           cleanText  Subjectivity  Polarity  \\\n",
       "1  anywher provid forward david livingston anasta...           0.0       0.0   \n",
       "2  compel park compani retain pay machin five mil...           0.0       0.0   \n",
       "\n",
       "  Sentiment  \n",
       "1   Neutral  \n",
       "2   Neutral  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_t[1:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "NgJni98DoQLa"
   },
   "outputs": [],
   "source": [
    "data_t=data_t.dropna( axis=0, subset=[\"cleanText\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "-6Q7quLz_45i"
   },
   "source": [
    "**Label encoding of labels**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "_Y6zx-Iv_45j",
    "outputId": "3b39c47c-000f-404b-f431-ef946750c587"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0. 1. 0.]\n",
      " [0. 1. 0.]\n",
      " [0. 1. 0.]\n",
      " [0. 0. 1.]\n",
      " [0. 1. 0.]]\n"
     ]
    }
   ],
   "source": [
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "x = label.fit_transform(data_t['Sentiment'])\n",
    "x = to_categorical(x)\n",
    "print(x[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "0J8ZarJn_45l"
   },
   "source": [
    "**Build a BERT layer**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "yz7hzOOr_45n"
   },
   "outputs": [],
   "source": [
    "m_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2'\n",
    "bert_layer = hub.KerasLayer(m_url, trainable=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "at8-uI27CmXd",
    "outputId": "e42b6061-1998-4871-cfc9-a6b2aac3f8f5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting bert-tensorflow\n",
      "  Downloading bert_tensorflow-1.0.4-py2.py3-none-any.whl (64 kB)\n",
      "\u001b[K     |████████████████████████████████| 64 kB 3.1 MB/s \n",
      "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from bert-tensorflow) (1.15.0)\n",
      "Installing collected packages: bert-tensorflow\n",
      "Successfully installed bert-tensorflow-1.0.4\n"
     ]
    }
   ],
   "source": [
    "pip install bert-tensorflow\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "3_uy7j14G9M6"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "VrIJyrMtG9RI",
    "outputId": "eae62106-f46e-4042-eef5-74815410d1bc"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['preserve_unused_tokens=False']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys\n",
    "from absl import flags\n",
    "sys.argv=['preserve_unused_tokens=False']\n",
    "flags.FLAGS(sys.argv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "_-dyVm8oDj_q"
   },
   "outputs": [],
   "source": [
    "from bert import tokenization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "aLQ0BE1w_45o"
   },
   "source": [
    "**Encoding the text**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "id": "AR350PJ0_45q"
   },
   "outputs": [],
   "source": [
    "vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n",
    "do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n",
    "tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)\n",
    "\n",
    "def bert_encode(texts, tokenizer, max_len=512):\n",
    "    all_tokens = []\n",
    "    all_masks = []\n",
    "    all_segments = []\n",
    "    \n",
    "    for text in texts:\n",
    "        text = tokenizer.tokenize(text)\n",
    "        \n",
    "        text = text[:max_len-2]\n",
    "        input_sequence = [\"[CLS]\"] + text + [\"[SEP]\"]\n",
    "        pad_len = max_len-len(input_sequence)\n",
    "        \n",
    "        tokens = tokenizer.convert_tokens_to_ids(input_sequence) + [0] * pad_len\n",
    "        pad_masks = [1] * len(input_sequence) + [0] * pad_len\n",
    "        segment_ids = [0] * max_len\n",
    "        \n",
    "        all_tokens.append(tokens)\n",
    "        all_masks.append(pad_masks)\n",
    "        all_segments.append(segment_ids)\n",
    "        \n",
    "    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WIgpLKl__45r"
   },
   "source": [
    "**Build The Model**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "id": "VoTnehxN_45t"
   },
   "outputs": [],
   "source": [
    "def build_model(bert_layer, max_len=512):\n",
    "    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_word_ids\")\n",
    "    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_mask\")\n",
    "    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"segment_ids\")\n",
    "    \n",
    "    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])\n",
    "    \n",
    "    clf_output = sequence_output[:, 0, :]\n",
    "    \n",
    "    lay = tf.keras.layers.Dense(64, activation='relu')(clf_output)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    lay = tf.keras.layers.Dense(32, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dense(16, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    out = tf.keras.layers.Dense(3, activation='softmax')(lay)\n",
    "    \n",
    "    model = tf.keras.models.Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)\n",
    "    model.compile(tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "f9DwncVuFqta"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "id": "yW-rgiMmn1yR"
   },
   "outputs": [],
   "source": [
    "data_t=data_t.dropna( axis=0, subset=[\"Sentiment\"])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DIlVUTpX_45u"
   },
   "source": [
    "Here We check only the first 250 characters of each text, and also we set train-test input and train labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "id": "i5c_zxlE_45v"
   },
   "outputs": [],
   "source": [
    "max_len = 200\n",
    "data_input = bert_encode(data_t.cleanText.values, tokenizer, max_len=max_len)\n",
    "data_labels = x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "1eJv1Up8_45w",
    "outputId": "aee4987c-bdc7-420f-e124-ce2668f94c16"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Negative' 'Neutral' 'Positive']\n"
     ]
    }
   ],
   "source": [
    "labels = label.classes_\n",
    "print(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "eSJs7UwK_45w",
    "outputId": "3da9640c-5719-4e58-fa2e-470588af25ef"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " input_word_ids (InputLayer)    [(None, 200)]        0           []                               \n",
      "                                                                                                  \n",
      " input_mask (InputLayer)        [(None, 200)]        0           []                               \n",
      "                                                                                                  \n",
      " segment_ids (InputLayer)       [(None, 200)]        0           []                               \n",
      "                                                                                                  \n",
      " keras_layer (KerasLayer)       [(None, 768),        109482241   ['input_word_ids[0][0]',         \n",
      "                                 (None, 200, 768)]                'input_mask[0][0]',             \n",
      "                                                                  'segment_ids[0][0]']            \n",
      "                                                                                                  \n",
      " tf.__operators__.getitem (Slic  (None, 768)         0           ['keras_layer[0][1]']            \n",
      " ingOpLambda)                                                                                     \n",
      "                                                                                                  \n",
      " dense (Dense)                  (None, 64)           49216       ['tf.__operators__.getitem[0][0]'\n",
      "                                                                 ]                                \n",
      "                                                                                                  \n",
      " dropout (Dropout)              (None, 64)           0           ['dense[0][0]']                  \n",
      "                                                                                                  \n",
      " dense_1 (Dense)                (None, 32)           2080        ['dropout[0][0]']                \n",
      "                                                                                                  \n",
      " dense_2 (Dense)                (None, 16)           528         ['dense_1[0][0]']                \n",
      "                                                                                                  \n",
      " dropout_1 (Dropout)            (None, 16)           0           ['dense_2[0][0]']                \n",
      "                                                                                                  \n",
      " dense_3 (Dense)                (None, 3)            51          ['dropout_1[0][0]']              \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 109,534,116\n",
      "Trainable params: 109,534,115\n",
      "Non-trainable params: 1\n",
      "__________________________________________________________________________________________________\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.8/dist-packages/keras/optimizers/optimizer_v2/adam.py:110: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
      "  super(Adam, self).__init__(name, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "model = build_model(bert_layer, max_len=max_len)\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "eDL4Sy6D_45y"
   },
   "source": [
    "**Run the model**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "yOYHSG4K_45z",
    "outputId": "a64f3a0b-222c-4a14-d5a4-03ac8391e9a6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "1614/1614 [==============================] - 1163s 712ms/step - loss: 0.3223 - accuracy: 0.8824 - val_loss: 0.0917 - val_accuracy: 0.9744\n",
      "Epoch 2/10\n",
      "1614/1614 [==============================] - 1150s 712ms/step - loss: 0.1087 - accuracy: 0.9701 - val_loss: 0.0536 - val_accuracy: 0.9870\n",
      "Epoch 3/10\n",
      "1614/1614 [==============================] - 1151s 713ms/step - loss: 0.0729 - accuracy: 0.9806 - val_loss: 0.0749 - val_accuracy: 0.9808\n",
      "Epoch 4/10\n",
      "1614/1614 [==============================] - 1153s 714ms/step - loss: 0.0596 - accuracy: 0.9849 - val_loss: 0.0729 - val_accuracy: 0.9732\n",
      "Epoch 5/10\n",
      "1614/1614 [==============================] - 1152s 714ms/step - loss: 0.0478 - accuracy: 0.9876 - val_loss: 0.0503 - val_accuracy: 0.9887\n",
      "Epoch 6/10\n",
      "1614/1614 [==============================] - 1151s 713ms/step - loss: 0.0393 - accuracy: 0.9900 - val_loss: 0.0685 - val_accuracy: 0.9856\n",
      "Epoch 7/10\n",
      "1614/1614 [==============================] - 1151s 713ms/step - loss: 0.0339 - accuracy: 0.9915 - val_loss: 0.0898 - val_accuracy: 0.9823\n",
      "Epoch 8/10\n",
      "1614/1614 [==============================] - 1150s 713ms/step - loss: 0.0295 - accuracy: 0.9919 - val_loss: 0.0596 - val_accuracy: 0.9873\n",
      "Epoch 9/10\n",
      "1614/1614 [==============================] - 1150s 713ms/step - loss: 0.0286 - accuracy: 0.9932 - val_loss: 0.0876 - val_accuracy: 0.9814\n",
      "Epoch 10/10\n",
      "1614/1614 [==============================] - 1150s 713ms/step - loss: 0.0188 - accuracy: 0.9945 - val_loss: 0.0771 - val_accuracy: 0.9842\n"
     ]
    }
   ],
   "source": [
    "#checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)\n",
    "#earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)\n",
    "#test_labels=y\n",
    "train_sh = model.fit(\n",
    "    data_input, data_labels,\n",
    "    validation_split=0.20,\n",
    "    epochs=10,\n",
    "    #callbacks=[checkpoint, earlystopping],\n",
    "    batch_size=16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "id": "IMFFZsp8o8mD"
   },
   "outputs": [],
   "source": [
    "data_test = pd.read_csv('/content/drive/My Drive/smartphone_preprocessed_sentiments_test.csv')\n",
    "data_test=data_test.dropna( axis=0, subset=[\"cleanText\"])\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "y = label.fit_transform(data_test['Sentiment'])\n",
    "y = to_categorical(y)\n",
    "#print(y[:5])\n",
    "max_len = 200\n",
    "test_input = bert_encode(data_test.cleanText.values, tokenizer, max_len=max_len)\n",
    "test_labels = y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "dVPPMbANo9OF",
    "outputId": "0dc7d4ce-71c6-4725-81af-c3c884d21c5b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "202/202 [==============================] - 91s 454ms/step\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.99      0.98      0.98       884\n",
      "           1       1.00      0.99      1.00      3209\n",
      "           2       0.99      1.00      0.99      2366\n",
      "\n",
      "    accuracy                           0.99      6459\n",
      "   macro avg       0.99      0.99      0.99      6459\n",
      "weighted avg       0.99      0.99      0.99      6459\n",
      "\n",
      "[[ 869    0   15]\n",
      " [   8 3189   12]\n",
      " [   5    2 2359]]\n",
      "Accuracy: 0.993497\n",
      "Precision: 0.993526\n",
      "Recall: 0.993497\n",
      "F1 score: 0.993501\n"
     ]
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt #(matplotblib)\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn.metrics import f1_score\n",
    "import seaborn as sns #(visualsize)\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "test_predictions =model.predict(test_input)\n",
    "test_labels=np.argmax(test_labels, axis=1)\n",
    "print(classification_report(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "print(confusion_matrix(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "accuracy = accuracy_score(test_labels,np.argmax(test_predictions,axis=1))\n",
    "print('Accuracy: %f' % accuracy)\n",
    "precision = precision_score(test_labels,np.argmax(test_predictions,axis=1),average='weighted')\n",
    "print('Precision: %f' % precision)\n",
    "recall = recall_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('Recall: %f' % recall)\n",
    "f1 = f1_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('F1 score: %f' % f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "mH4tr1yQo9W-"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "EasZOhjwo9Zg"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "9StZ_GnWn2lo"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "sDgpnIRd16Tb"
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "# import BERT tokenization\n",
    "\n",
    "!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "6cMy1yQX2uOd",
    "outputId": "9338c84b-8c2a-45f4-83fc-1f3a7e19c2e5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting tokenization\n",
      "  Downloading tokenization-1.0.7-py3-none-any.whl (10 kB)\n",
      "Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from tokenization) (2022.6.2)\n",
      "Installing collected packages: tokenization\n",
      "Successfully installed tokenization-1.0.7\n"
     ]
    }
   ],
   "source": [
    "pip install tokenization\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "FH6f2pMP16am",
    "outputId": "ac8a71b3-f717-4dad-ce5e-9b14ec26cf04"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mounted at /content/drive\n"
     ]
    }
   ],
   "source": [
    "import tokenization\n",
    "import tensorflow as tf\n",
    "import tensorflow_hub as hub\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from google.colab import drive\n",
    "import os\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ltRKPKiV16fl"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "EEPZeaS0n2sO"
   },
   "outputs": [],
   "source": [
    "data = pd.read_csv('/content/drive/My Drive/smartphone_withoutpreprocessed_sentiments.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "JyQMxy4wn2u1"
   },
   "outputs": [],
   "source": [
    "data=data.dropna( axis=0, subset=[\"Tweets\"])\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "x = label.fit_transform(data['Sentiment'])\n",
    "x = to_categorical(x)\n",
    "\n",
    "#testing\n",
    "#label = preprocessing.LabelEncoder()\n",
    "#y = label.fit_transform(test_data['Sentiment'])\n",
    "#y = to_categorical(y)\n",
    "\n",
    "\n",
    "m_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2'\n",
    "bert_layer = hub.KerasLayer(m_url, trainable=True)\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "vKEd9x3f3c2H",
    "outputId": "1a8cb53a-8546-4c4f-e536-91e3f841a9bc"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting bert-tensorflow\n",
      "  Downloading bert_tensorflow-1.0.4-py2.py3-none-any.whl (64 kB)\n",
      "\u001b[K     |████████████████████████████████| 64 kB 2.0 MB/s \n",
      "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from bert-tensorflow) (1.15.0)\n",
      "Installing collected packages: bert-tensorflow\n",
      "Successfully installed bert-tensorflow-1.0.4\n"
     ]
    }
   ],
   "source": [
    "pip install bert-tensorflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "tIEF8E_en2xP"
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "from absl import flags\n",
    "sys.argv=['preserve_unused_tokens=False']\n",
    "flags.FLAGS(sys.argv)\n",
    "\n",
    "from bert import tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "iK2j_L-6n2zx"
   },
   "outputs": [],
   "source": [
    "vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n",
    "do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n",
    "tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)\n",
    "\n",
    "def bert_encode(texts, tokenizer, max_len=512):\n",
    "    all_tokens = []\n",
    "    all_masks = []\n",
    "    all_segments = []\n",
    "    \n",
    "    for text in texts:\n",
    "        text = tokenizer.tokenize(text)\n",
    "        \n",
    "        text = text[:max_len-2]\n",
    "        input_sequence = [\"[CLS]\"] + text + [\"[SEP]\"]\n",
    "        pad_len = max_len-len(input_sequence)\n",
    "        \n",
    "        tokens = tokenizer.convert_tokens_to_ids(input_sequence) + [0] * pad_len\n",
    "        pad_masks = [1] * len(input_sequence) + [0] * pad_len\n",
    "        segment_ids = [0] * max_len\n",
    "        \n",
    "        all_tokens.append(tokens)\n",
    "        all_masks.append(pad_masks)\n",
    "        all_segments.append(segment_ids)\n",
    "        \n",
    "    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "tDfeXu45n22H"
   },
   "outputs": [],
   "source": [
    "def build_model(bert_layer, max_len=512):\n",
    "    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_word_ids\")\n",
    "    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_mask\")\n",
    "    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"segment_ids\")\n",
    "    \n",
    "    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])\n",
    "    \n",
    "    clf_output = sequence_output[:, 0, :]\n",
    "    \n",
    "    lay = tf.keras.layers.Dense(64, activation='relu')(clf_output)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    lay = tf.keras.layers.Dense(32, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dense(16, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    out = tf.keras.layers.Dense(3, activation='softmax')(lay)\n",
    "    \n",
    "    model = tf.keras.models.Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)\n",
    "    model.compile(tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "4SQy8-M8n24t",
    "outputId": "d29f1a8e-78e7-4006-b765-86f99147809f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Negative' 'Neutral' 'Positive']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.8/dist-packages/keras/optimizers/optimizer_v2/adam.py:110: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
      "  super(Adam, self).__init__(name, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "max_len = 200\n",
    "data_input = bert_encode(data.Tweets.values, tokenizer, max_len=max_len)\n",
    "data_labels = x\n",
    "labels = label.classes_\n",
    "print(labels)\n",
    "model = build_model(bert_layer, max_len=max_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "mEHxaoxXn263",
    "outputId": "3e967cba-94bf-4aeb-ecbf-d63b57c312b7"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "1621/1621 [==============================] - 1209s 737ms/step - loss: 0.4848 - accuracy: 0.8187 - val_loss: 0.2592 - val_accuracy: 0.9190\n",
      "Epoch 2/10\n",
      "1621/1621 [==============================] - 1213s 748ms/step - loss: 0.2104 - accuracy: 0.9370 - val_loss: 0.3539 - val_accuracy: 0.9206\n",
      "Epoch 3/10\n",
      "1621/1621 [==============================] - 1210s 747ms/step - loss: 0.1402 - accuracy: 0.9618 - val_loss: 0.2676 - val_accuracy: 0.9388\n",
      "Epoch 4/10\n",
      "1621/1621 [==============================] - 1213s 748ms/step - loss: 0.1036 - accuracy: 0.9718 - val_loss: 0.2405 - val_accuracy: 0.9531\n",
      "Epoch 5/10\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0742 - accuracy: 0.9808 - val_loss: 0.2299 - val_accuracy: 0.9520\n",
      "Epoch 6/10\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0593 - accuracy: 0.9849 - val_loss: 0.3030 - val_accuracy: 0.9540\n",
      "Epoch 7/10\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0486 - accuracy: 0.9863 - val_loss: 0.3550 - val_accuracy: 0.9510\n",
      "Epoch 8/10\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0379 - accuracy: 0.9890 - val_loss: 0.3728 - val_accuracy: 0.9502\n",
      "Epoch 9/10\n",
      "1621/1621 [==============================] - 1217s 751ms/step - loss: 0.0364 - accuracy: 0.9904 - val_loss: 0.3250 - val_accuracy: 0.9488\n",
      "1621/1621 [==============================] - 1217s 751ms/step - loss: 0.0364 - accuracy: 0.9904 - val_loss: 0.3250 - val_accuracy: 0.9488\n",
      "Epoch 10/10\n",
      "Epoch 10/10\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0275 - accuracy: 0.9922 - val_loss: 0.3902 - val_accuracy: 0.9488\n",
      "1621/1621 [==============================] - 1216s 750ms/step - loss: 0.0275 - accuracy: 0.9922 - val_loss: 0.3902 - val_accuracy: 0.9488\n"
     ]
    }
   ],
   "source": [
    "#checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)\n",
    "#earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)\n",
    "train_sh = model.fit(\n",
    "    data_input, data_labels,\n",
    "    validation_split=0.20,\n",
    "    epochs=10,\n",
    "    #callbacks=[checkpoint, earlystopping],\n",
    "    batch_size=16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "15V1u4u7n29G"
   },
   "outputs": [],
   "source": [
    "data_test = pd.read_csv('/content/drive/My Drive/smartphone_withoutpreprocessed_sentiments_test.csv')\n",
    "data_test=data_test.dropna( axis=0, subset=[\"Tweets\"])\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "y = label.fit_transform(data_test['Sentiment'])\n",
    "y = to_categorical(y)\n",
    "#print(y[:5])\n",
    "max_len = 200\n",
    "test_input = bert_encode(data_test.Tweets.values, tokenizer, max_len=max_len)\n",
    "test_labels = y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "toweEzDCn3Bp",
    "outputId": "bd3a2541-9fca-4d37-baf5-7797191f6280"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "203/203 [==============================] - 92s 455ms/step\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.96      0.98      0.97      1095\n",
      "           1       1.00      0.98      0.99      2411\n",
      "           2       0.99      0.99      0.99      2978\n",
      "\n",
      "    accuracy                           0.99      6484\n",
      "   macro avg       0.98      0.98      0.98      6484\n",
      "weighted avg       0.99      0.99      0.99      6484\n",
      "\n",
      "[[1068    4   23]\n",
      " [  25 2365   21]\n",
      " [  14    5 2959]]\n",
      "Accuracy: 0.985811\n",
      "Precision: 0.985911\n",
      "Recall: 0.985811\n",
      "F1 score: 0.985826\n"
     ]
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt #(matplotblib)\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn.metrics import f1_score\n",
    "import seaborn as sns #(visualsize)\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "test_predictions =model.predict(test_input)\n",
    "test_labels=np.argmax(test_labels, axis=1)\n",
    "print(classification_report(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "print(confusion_matrix(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "accuracy = accuracy_score(test_labels,np.argmax(test_predictions,axis=1))\n",
    "print('Accuracy: %f' % accuracy)\n",
    "precision = precision_score(test_labels,np.argmax(test_predictions,axis=1),average='weighted')\n",
    "print('Precision: %f' % precision)\n",
    "recall = recall_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('Recall: %f' % recall)\n",
    "f1 = f1_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('F1 score: %f' % f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8FYB11mEn3EP"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "id": "zfZHmiV5z_8N"
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "id": "y0o0Nbm_z__P"
   },
   "outputs": [],
   "source": [
    "# import BERT tokenization\n",
    "\n",
    "!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "iHMINQxB0QDP",
    "outputId": "8f40f252-794a-4522-ce4c-44a63da7e832"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Requirement already satisfied: tokenization in /usr/local/lib/python3.8/dist-packages (1.0.7)\n",
      "Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from tokenization) (2022.6.2)\n"
     ]
    }
   ],
   "source": [
    "pip install tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "rG6hmVnb0ACU",
    "outputId": "e47511a9-931e-49da-a38a-62a2c20f6679"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
     ]
    }
   ],
   "source": [
    "import tokenization\n",
    "import tensorflow as tf\n",
    "import tensorflow_hub as hub\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from google.colab import drive\n",
    "import os\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "gb7B7SKzy4o9"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "id": "x-QM02jTn3GL"
   },
   "outputs": [],
   "source": [
    "data = pd.read_csv('/content/drive/My Drive/crypto_10k_tweets_preprocessed_sentiments.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "id": "xN17BFN2alVH"
   },
   "outputs": [],
   "source": [
    "data=data.dropna( axis=0, subset=[\"cleanText\"])\n",
    "\n",
    "\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "x = label.fit_transform(data['Sentiment'])\n",
    "x = to_categorical(x)\n",
    "\n",
    "\n",
    "m_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2'\n",
    "bert_layer = hub.KerasLayer(m_url, trainable=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "wKNH9QpcalXm",
    "outputId": "ae344e0b-9b2d-4661-d7a5-49bad94e0a1c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Requirement already satisfied: bert-tensorflow in /usr/local/lib/python3.8/dist-packages (1.0.4)\n",
      "Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from bert-tensorflow) (1.15.0)\n"
     ]
    }
   ],
   "source": [
    "pip install bert-tensorflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "id": "GGV3Sx9OalaF"
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "from absl import flags\n",
    "sys.argv=['preserve_unused_tokens=False']\n",
    "flags.FLAGS(sys.argv)\n",
    "\n",
    "from bert import tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "id": "57JO-17calc_"
   },
   "outputs": [],
   "source": [
    "vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n",
    "do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n",
    "tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)\n",
    "\n",
    "def bert_encode(texts, tokenizer, max_len=512):\n",
    "    all_tokens = []\n",
    "    all_masks = []\n",
    "    all_segments = []\n",
    "    \n",
    "    for text in texts:\n",
    "        text = tokenizer.tokenize(text)\n",
    "        \n",
    "        text = text[:max_len-2]\n",
    "        input_sequence = [\"[CLS]\"] + text + [\"[SEP]\"]\n",
    "        pad_len = max_len-len(input_sequence)\n",
    "        \n",
    "        tokens = tokenizer.convert_tokens_to_ids(input_sequence) + [0] * pad_len\n",
    "        pad_masks = [1] * len(input_sequence) + [0] * pad_len\n",
    "        segment_ids = [0] * max_len\n",
    "        \n",
    "        all_tokens.append(tokens)\n",
    "        all_masks.append(pad_masks)\n",
    "        all_segments.append(segment_ids)\n",
    "        \n",
    "    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "id": "IgRSlL7Pa2pI"
   },
   "outputs": [],
   "source": [
    "def build_model(bert_layer, max_len=512):\n",
    "    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_word_ids\")\n",
    "    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_mask\")\n",
    "    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"segment_ids\")\n",
    "    \n",
    "    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])\n",
    "    \n",
    "    clf_output = sequence_output[:, 0, :]\n",
    "    \n",
    "    lay = tf.keras.layers.Dense(64, activation='relu')(clf_output)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    lay = tf.keras.layers.Dense(32, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dense(16, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    out = tf.keras.layers.Dense(3, activation='softmax')(lay)\n",
    "    \n",
    "    model = tf.keras.models.Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)\n",
    "    model.compile(tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "MFMt1g6Za2su",
    "outputId": "8ce02379-d656-4f92-c205-5b5d836f6014"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Negative' 'Neutral' 'Positive']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.8/dist-packages/keras/optimizers/optimizer_v2/adam.py:110: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
      "  super(Adam, self).__init__(name, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "max_len = 200\n",
    "data_input = bert_encode(data.cleanText.values, tokenizer, max_len=max_len)\n",
    "data_labels = x\n",
    "\n",
    "\n",
    "labels = label.classes_\n",
    "print(labels)\n",
    "\n",
    "model = build_model(bert_layer, max_len=max_len)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "-ncLBQbga2vr",
    "outputId": "682df943-c08c-4ade-bf16-ba5dd1396f9c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "459/459 [==============================] - 340s 716ms/step - loss: 0.5579 - accuracy: 0.7723 - val_loss: 0.3138 - val_accuracy: 0.8938\n",
      "Epoch 2/10\n",
      "459/459 [==============================] - 340s 742ms/step - loss: 0.2360 - accuracy: 0.9204 - val_loss: 0.1810 - val_accuracy: 0.9597\n",
      "Epoch 3/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.1236 - accuracy: 0.9657 - val_loss: 0.1556 - val_accuracy: 0.9608\n",
      "Epoch 4/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0798 - accuracy: 0.9786 - val_loss: 0.2027 - val_accuracy: 0.9581\n",
      "Epoch 5/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0728 - accuracy: 0.9824 - val_loss: 0.1801 - val_accuracy: 0.9662\n",
      "Epoch 6/10\n",
      "459/459 [==============================] - 340s 742ms/step - loss: 0.0408 - accuracy: 0.9910 - val_loss: 0.2025 - val_accuracy: 0.9630\n",
      "Epoch 7/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0491 - accuracy: 0.9884 - val_loss: 0.2965 - val_accuracy: 0.9504\n",
      "Epoch 8/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0502 - accuracy: 0.9903 - val_loss: 0.1991 - val_accuracy: 0.9684\n",
      "Epoch 9/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0298 - accuracy: 0.9931 - val_loss: 0.2223 - val_accuracy: 0.9706\n",
      "Epoch 10/10\n",
      "459/459 [==============================] - 341s 743ms/step - loss: 0.0248 - accuracy: 0.9955 - val_loss: 0.2224 - val_accuracy: 0.9684\n"
     ]
    }
   ],
   "source": [
    "#checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)\n",
    "#earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)\n",
    "train_sh2 = model.fit(\n",
    "    data_input, data_labels,\n",
    "    validation_split=0.20,\n",
    "    epochs=10,\n",
    "    #callbacks=[checkpoint, earlystopping],\n",
    "    batch_size=16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "nFJquVpha2zI",
    "outputId": "897bd6c5-ab2a-4449-d498-98438188ad6b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "58/58 [==============================] - 25s 441ms/step\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.95      0.98      0.96       170\n",
      "           1       1.00      1.00      1.00      1232\n",
      "           2       1.00      0.98      0.99       438\n",
      "\n",
      "    accuracy                           0.99      1840\n",
      "   macro avg       0.98      0.99      0.98      1840\n",
      "weighted avg       0.99      0.99      0.99      1840\n",
      "\n",
      "[[ 166    3    1]\n",
      " [   3 1229    0]\n",
      " [   6    1  431]]\n",
      "Accuracy: 0.992391\n",
      "Precision: 0.992525\n",
      "Recall: 0.992391\n",
      "F1 score: 0.992428\n"
     ]
    }
   ],
   "source": [
    "data_test = pd.read_csv('/content/drive/My Drive/crypto_10k_tweets_preprocessed_sentiments_test.csv')\n",
    "data_test=data_test.dropna( axis=0, subset=[\"cleanText\"])\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "y = label.fit_transform(data_test['Sentiment'])\n",
    "y = to_categorical(y)\n",
    "#print(y[:5])\n",
    "max_len = 200\n",
    "test_input = bert_encode(data_test.cleanText.values, tokenizer, max_len=max_len)\n",
    "test_labels = y\n",
    "\n",
    "from matplotlib import pyplot as plt #(matplotblib)\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn.metrics import f1_score\n",
    "import seaborn as sns #(visualsize)\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "test_predictions =model.predict(test_input)\n",
    "test_labels=np.argmax(test_labels, axis=1)\n",
    "print(classification_report(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "print(confusion_matrix(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "accuracy = accuracy_score(test_labels,np.argmax(test_predictions,axis=1))\n",
    "print('Accuracy: %f' % accuracy)\n",
    "precision = precision_score(test_labels,np.argmax(test_predictions,axis=1),average='weighted')\n",
    "print('Precision: %f' % precision)\n",
    "recall = recall_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('Recall: %f' % recall)\n",
    "f1 = f1_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('F1 score: %f' % f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7AyD6eVcBGYY"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "id": "w_iZnhl3BGiT"
   },
   "outputs": [],
   "source": [
    "data = pd.read_csv('/content/drive/My Drive/crypto_10k_tweets_withoutpreprocessed_sentiments.csv')\n",
    "data=data.dropna( axis=0, subset=[\"Content\"])\n",
    "\n",
    "\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "x = label.fit_transform(data['Sentiment'])\n",
    "x = to_categorical(x)\n",
    "\n",
    "\n",
    "m_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2'\n",
    "bert_layer = hub.KerasLayer(m_url, trainable=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "id": "SJUm2uKmBGrv"
   },
   "outputs": [],
   "source": [
    "vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()\n",
    "do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()\n",
    "tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)\n",
    "\n",
    "def bert_encode(texts, tokenizer, max_len=512):\n",
    "    all_tokens = []\n",
    "    all_masks = []\n",
    "    all_segments = []\n",
    "    \n",
    "    for text in texts:\n",
    "        text = tokenizer.tokenize(text)\n",
    "        \n",
    "        text = text[:max_len-2]\n",
    "        input_sequence = [\"[CLS]\"] + text + [\"[SEP]\"]\n",
    "        pad_len = max_len-len(input_sequence)\n",
    "        \n",
    "        tokens = tokenizer.convert_tokens_to_ids(input_sequence) + [0] * pad_len\n",
    "        pad_masks = [1] * len(input_sequence) + [0] * pad_len\n",
    "        segment_ids = [0] * max_len\n",
    "        \n",
    "        all_tokens.append(tokens)\n",
    "        all_masks.append(pad_masks)\n",
    "        all_segments.append(segment_ids)\n",
    "        \n",
    "    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "id": "R6o4WvhvBGzW"
   },
   "outputs": [],
   "source": [
    "def build_model(bert_layer, max_len=512):\n",
    "    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_word_ids\")\n",
    "    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"input_mask\")\n",
    "    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=\"segment_ids\")\n",
    "    \n",
    "    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])\n",
    "    \n",
    "    clf_output = sequence_output[:, 0, :]\n",
    "    \n",
    "    lay = tf.keras.layers.Dense(64, activation='relu')(clf_output)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    lay = tf.keras.layers.Dense(32, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dense(16, activation='relu')(lay)\n",
    "    lay = tf.keras.layers.Dropout(0.2)(lay)\n",
    "    out = tf.keras.layers.Dense(3, activation='softmax')(lay)\n",
    "    \n",
    "    model = tf.keras.models.Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)\n",
    "    model.compile(tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "vnMWi1pOBG8G",
    "outputId": "035cc106-b13f-4e52-9fe8-b13b5f398497"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Negative' 'Neutral' 'Positive']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.8/dist-packages/keras/optimizers/optimizer_v2/adam.py:110: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
      "  super(Adam, self).__init__(name, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "max_len = 200\n",
    "data_input = bert_encode(data.Content.values, tokenizer, max_len=max_len)\n",
    "data_labels = x\n",
    "\n",
    "\n",
    "labels = label.classes_\n",
    "print(labels)\n",
    "\n",
    "model = build_model(bert_layer, max_len=max_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "an9KDmHxBHD1",
    "outputId": "e3fb5ef0-03fa-487f-91ec-7ae2f6039f80"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "500/500 [==============================] - 379s 734ms/step - loss: 0.6074 - accuracy: 0.7531 - val_loss: 0.3676 - val_accuracy: 0.8715\n",
      "Epoch 2/10\n",
      "500/500 [==============================] - 376s 753ms/step - loss: 0.3014 - accuracy: 0.8991 - val_loss: 0.3438 - val_accuracy: 0.9110\n",
      "Epoch 3/10\n",
      "500/500 [==============================] - 377s 754ms/step - loss: 0.1746 - accuracy: 0.9460 - val_loss: 0.2418 - val_accuracy: 0.9400\n",
      "Epoch 4/10\n",
      "500/500 [==============================] - 376s 753ms/step - loss: 0.1101 - accuracy: 0.9641 - val_loss: 0.2686 - val_accuracy: 0.9320\n",
      "Epoch 5/10\n",
      "500/500 [==============================] - 377s 754ms/step - loss: 0.0745 - accuracy: 0.9787 - val_loss: 0.3562 - val_accuracy: 0.9380\n",
      "Epoch 6/10\n",
      "500/500 [==============================] - 376s 753ms/step - loss: 0.0553 - accuracy: 0.9844 - val_loss: 0.3645 - val_accuracy: 0.9330\n",
      "Epoch 7/10\n",
      "500/500 [==============================] - 377s 754ms/step - loss: 0.0439 - accuracy: 0.9879 - val_loss: 0.3589 - val_accuracy: 0.9395\n",
      "Epoch 8/10\n",
      "500/500 [==============================] - 377s 754ms/step - loss: 0.0345 - accuracy: 0.9902 - val_loss: 0.4334 - val_accuracy: 0.9250\n",
      "Epoch 9/10\n",
      "500/500 [==============================] - 377s 753ms/step - loss: 0.0366 - accuracy: 0.9918 - val_loss: 0.3511 - val_accuracy: 0.9335\n",
      "Epoch 10/10\n",
      "500/500 [==============================] - 376s 753ms/step - loss: 0.0290 - accuracy: 0.9921 - val_loss: 0.4590 - val_accuracy: 0.9350\n"
     ]
    }
   ],
   "source": [
    "#checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)\n",
    "#earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)\n",
    "train_sh3 = model.fit(\n",
    "    data_input, data_labels,\n",
    "    validation_split=0.20,\n",
    "    epochs=10,\n",
    "    #callbacks=[checkpoint, earlystopping],\n",
    "    batch_size=16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Hx29ZDSmBdre",
    "outputId": "452efcf9-661f-4a4c-feac-a434d425b197"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "63/63 [==============================] - 26s 416ms/step\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.97      0.92      0.95       169\n",
      "           1       0.99      0.98      0.99      1226\n",
      "           2       0.97      0.99      0.98       605\n",
      "\n",
      "    accuracy                           0.98      2000\n",
      "   macro avg       0.97      0.96      0.97      2000\n",
      "weighted avg       0.98      0.98      0.98      2000\n",
      "\n",
      "[[ 156    8    5]\n",
      " [   4 1207   15]\n",
      " [   1    7  597]]\n",
      "Accuracy: 0.980000\n",
      "Precision: 0.980046\n",
      "Recall: 0.980000\n",
      "F1 score: 0.979946\n"
     ]
    }
   ],
   "source": [
    "data_test = pd.read_csv('/content/drive/My Drive/crypto_10k_tweets_withoutpreprocessed_sentiments_test.csv')\n",
    "data_test=data_test.dropna( axis=0, subset=[\"Content\"])\n",
    "#training\n",
    "label = preprocessing.LabelEncoder()\n",
    "y = label.fit_transform(data_test['Sentiment'])\n",
    "y = to_categorical(y)\n",
    "#print(y[:5])\n",
    "max_len = 200\n",
    "test_input = bert_encode(data_test.Content.values, tokenizer, max_len=max_len)\n",
    "test_labels = y\n",
    "\n",
    "from matplotlib import pyplot as plt #(matplotblib)\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn.metrics import f1_score\n",
    "import seaborn as sns #(visualsize)\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "test_predictions =model.predict(test_input)\n",
    "test_labels=np.argmax(test_labels, axis=1)\n",
    "print(classification_report(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "print(confusion_matrix(test_labels,np.argmax(test_predictions,axis=1)))\n",
    "accuracy = accuracy_score(test_labels,np.argmax(test_predictions,axis=1))\n",
    "print('Accuracy: %f' % accuracy)\n",
    "precision = precision_score(test_labels,np.argmax(test_predictions,axis=1),average='weighted')\n",
    "print('Precision: %f' % precision)\n",
    "recall = recall_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('Recall: %f' % recall)\n",
    "f1 = f1_score(test_labels,np.argmax(test_predictions,axis=1), average='weighted')\n",
    "print('F1 score: %f' % f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5Vda2kZ2BdxV"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "machine_shape": "hm",
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}