{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iJZroSnbTbrf", "outputId": "2225daa4-b4d5-462c-f758-181e07811a2b" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LTtjriHZUdyx", "outputId": "9f115791-19a0-4069-90bb-2fe495479bcc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting VaderSentiment\n", " Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/126.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━\u001b[0m \u001b[32m112.6/126.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m126.0/126.0 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from VaderSentiment) (2.31.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->VaderSentiment) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->VaderSentiment) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->VaderSentiment) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->VaderSentiment) (2023.7.22)\n", "Installing collected packages: VaderSentiment\n", "Successfully installed VaderSentiment-3.3.2\n" ] } ], "source": [ "!pip install VaderSentiment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MlFWUkrSUzxg", "outputId": "ea477eaf-551f-4b3f-d5bc-b5a5581ec131" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting flair\n", " Downloading flair-0.12.2-py3-none-any.whl (373 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m373.1/373.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.10/dist-packages (from flair) (2.8.2)\n", "Requirement already satisfied: torch!=1.8,>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from flair) (2.0.1+cu118)\n", "Requirement already satisfied: gensim>=3.8.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.3.1)\n", "Requirement already satisfied: tqdm>=4.26.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.66.1)\n", "Collecting segtok>=1.5.7 (from flair)\n", " Downloading segtok-1.5.11-py3-none-any.whl (24 kB)\n", "Requirement already satisfied: matplotlib>=2.2.3 in /usr/local/lib/python3.10/dist-packages (from flair) (3.7.1)\n", "Collecting mpld3==0.3 (from flair)\n", " Downloading mpld3-0.3.tar.gz (788 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m788.5/788.5 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from flair) (1.2.2)\n", "Collecting sqlitedict>=1.6.0 (from flair)\n", " Downloading sqlitedict-2.1.0.tar.gz (21 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting deprecated>=1.2.4 (from flair)\n", " Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", "Requirement already satisfied: hyperopt>=0.2.7 in /usr/local/lib/python3.10/dist-packages (from flair) (0.2.7)\n", "Collecting boto3 (from flair)\n", " Downloading boto3-1.28.36-py3-none-any.whl (135 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting transformers[sentencepiece]>=4.18.0 (from flair)\n", " Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting bpemb>=0.3.2 (from flair)\n", " Downloading bpemb-0.3.4-py3-none-any.whl (19 kB)\n", "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from flair) (2023.6.3)\n", "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from flair) (0.9.0)\n", "Collecting langdetect (from flair)\n", " Downloading langdetect-1.0.9.tar.gz (981 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from flair) (4.9.3)\n", "Collecting ftfy (from flair)\n", " Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting janome (from flair)\n", " Downloading Janome-0.5.0-py2.py3-none-any.whl (19.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m44.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting gdown==4.4.0 (from flair)\n", " Downloading gdown-4.4.0.tar.gz (14 kB)\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Collecting huggingface-hub>=0.10.0 (from flair)\n", " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m20.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting conllu>=4.0 (from flair)\n", " Downloading conllu-4.5.3-py2.py3-none-any.whl (16 kB)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from flair) (10.1.0)\n", "Collecting wikipedia-api (from flair)\n", " Downloading Wikipedia_API-0.6.0-py3-none-any.whl (14 kB)\n", "Collecting pptree (from flair)\n", " Downloading pptree-3.1.tar.gz (3.0 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting pytorch-revgrad (from flair)\n", " Downloading pytorch_revgrad-0.2.0-py3-none-any.whl (4.6 kB)\n", "Collecting transformer-smaller-training-vocab>=0.2.1 (from flair)\n", " Downloading transformer_smaller_training_vocab-0.3.1-py3-none-any.whl (14 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (3.12.2)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (2.31.0)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (1.16.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (4.11.2)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from bpemb>=0.3.2->flair) (1.23.5)\n", "Collecting sentencepiece (from bpemb>=0.3.2->flair)\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.4->flair) (1.14.1)\n", "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=3.8.0->flair) (1.10.1)\n", "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=3.8.0->flair) (6.3.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (2023.6.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (6.0.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (4.7.1)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (23.1)\n", "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (3.1)\n", "Requirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (0.18.3)\n", "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (2.2.1)\n", "Requirement already satisfied: py4j in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (0.10.9.7)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (1.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (4.42.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (1.4.4)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (9.4.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (3.1.1)\n", "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->flair) (1.3.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->flair) (3.2.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (1.12)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (3.1.2)\n", "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (2.0.0)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.8,>=1.5.0->flair) (3.27.2)\n", "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.8,>=1.5.0->flair) (16.0.6)\n", "Collecting torch!=1.8,>=1.5.0 (from flair)\n", " Downloading torch-2.0.0-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.0/21.0 MB\u001b[0m \u001b[31m45.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-runtime-cu11==11.7.99 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m849.3/849.3 kB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cuda-cupti-cu11==11.7.101 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m89.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cudnn-cu11==8.5.0.96 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m557.1/557.1 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cublas-cu11==11.10.3.66 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.1/317.1 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cufft-cu11==10.9.0.58 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.4/168.4 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-curand-cu11==10.2.10.91 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusolver-cu11==11.4.0.1 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.6/102.6 MB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusparse-cu11==11.7.4.91 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.2/173.2 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-nccl-cu11==2.14.3 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.1/177.1 MB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting nvidia-nvtx-cu11==11.7.91 (from torch!=1.8,>=1.5.0->flair)\n", " Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.6/98.6 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch!=1.8,>=1.5.0->flair) (67.7.2)\n", "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch!=1.8,>=1.5.0->flair) (0.41.2)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[sentencepiece]>=4.18.0->flair)\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m65.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers[sentencepiece]>=4.18.0->flair)\n", " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.18.0->flair) (3.20.3)\n", "Collecting botocore<1.32.0,>=1.31.36 (from boto3->flair)\n", " Downloading botocore-1.31.36-py3-none-any.whl (11.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.1/11.1 MB\u001b[0m \u001b[31m109.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->flair)\n", " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", "Collecting s3transfer<0.7.0,>=0.6.0 (from boto3->flair)\n", " Downloading s3transfer-0.6.2-py3-none-any.whl (79 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: wcwidth>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ftfy->flair) (0.2.6)\n", "Collecting urllib3<1.27,>=1.25.4 (from botocore<1.32.0,>=1.31.36->boto3->flair)\n", " Downloading urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.1/143.1 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting accelerate>=0.20.3 (from transformers[sentencepiece]>=4.18.0->flair)\n", " Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.2/251.2 kB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown==4.4.0->flair) (2.4.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.8,>=1.5.0->flair) (2.1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (2023.7.22)\n", "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (1.7.1)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.8,>=1.5.0->flair) (1.3.0)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[sentencepiece]>=4.18.0->flair) (5.9.5)\n", "Building wheels for collected packages: gdown, mpld3, sqlitedict, langdetect, pptree\n", " Building wheel for gdown (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for gdown: filename=gdown-4.4.0-py3-none-any.whl size=14758 sha256=5df426840ee967367c9df77389c67f469d52d29f91fcc7de7ed7ed16a2457953\n", " Stored in directory: /root/.cache/pip/wheels/03/0b/3f/6ddf67a417a5b400b213b0bb772a50276c199a386b12c06bfc\n", " Building wheel for mpld3 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for mpld3: filename=mpld3-0.3-py3-none-any.whl size=116686 sha256=1fb369b23567dbe989f4a84e4a68a4a2cd966313293fc40f44f837475dc9780f\n", " Stored in directory: /root/.cache/pip/wheels/9c/92/f7/45d9aac5dcfb1c2a1761a272365599cc7ba1050ce211a3fd9a\n", " Building wheel for sqlitedict (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for sqlitedict: filename=sqlitedict-2.1.0-py3-none-any.whl size=16864 sha256=3e7a79b3e61ffd24b490417f95e2d6a2ab2df52dd590ea1321c6e5f1904e2109\n", " Stored in directory: /root/.cache/pip/wheels/79/d6/e7/304e0e6cb2221022c26d8161f7c23cd4f259a9e41e8bbcfabd\n", " Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993224 sha256=961a135a080efa6c8cdd0d1f4b05139fd4ae63d1ddee873ef7be243b5d226242\n", " Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n", " Building wheel for pptree (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pptree: filename=pptree-3.1-py3-none-any.whl size=4609 sha256=88ed43904c52b06328a0ddb2984c8a34c213c343571aa309edbf38cf6f16c955\n", " Stored in directory: /root/.cache/pip/wheels/9f/b6/0e/6f26eb9e6eb53ff2107a7888d72b5a6a597593956113037828\n", "Successfully built gdown mpld3 sqlitedict langdetect pptree\n", "Installing collected packages: tokenizers, sqlitedict, sentencepiece, safetensors, pptree, mpld3, janome, urllib3, segtok, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, langdetect, jmespath, ftfy, deprecated, conllu, nvidia-cusolver-cu11, nvidia-cudnn-cu11, botocore, wikipedia-api, s3transfer, huggingface-hub, bpemb, transformers, gdown, boto3, torch, accelerate, transformer-smaller-training-vocab, pytorch-revgrad, flair\n", " Attempting uninstall: urllib3\n", " Found existing installation: urllib3 2.0.4\n", " Uninstalling urllib3-2.0.4:\n", " Successfully uninstalled urllib3-2.0.4\n", " Attempting uninstall: gdown\n", " Found existing installation: gdown 4.6.6\n", " Uninstalling gdown-4.6.6:\n", " Successfully uninstalled gdown-4.6.6\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.0.1+cu118\n", " Uninstalling torch-2.0.1+cu118:\n", " Successfully uninstalled torch-2.0.1+cu118\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "torchaudio 2.0.2+cu118 requires torch==2.0.1, but you have torch 2.0.0 which is incompatible.\n", "torchdata 0.6.1 requires torch==2.0.1, but you have torch 2.0.0 which is incompatible.\n", "torchtext 0.15.2 requires torch==2.0.1, but you have torch 2.0.0 which is incompatible.\n", "torchvision 0.15.2+cu118 requires torch==2.0.1, but you have torch 2.0.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed accelerate-0.22.0 boto3-1.28.36 botocore-1.31.36 bpemb-0.3.4 conllu-4.5.3 deprecated-1.2.14 flair-0.12.2 ftfy-6.1.1 gdown-4.4.0 huggingface-hub-0.16.4 janome-0.5.0 jmespath-1.0.1 langdetect-1.0.9 mpld3-0.3 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 pptree-3.1 pytorch-revgrad-0.2.0 s3transfer-0.6.2 safetensors-0.3.3 segtok-1.5.11 sentencepiece-0.1.99 sqlitedict-2.1.0 tokenizers-0.13.3 torch-2.0.0 transformer-smaller-training-vocab-0.3.1 transformers-4.32.1 urllib3-1.26.16 wikipedia-api-0.6.0\n" ] } ], "source": [ "!pip install flair" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "id": "pS8IoLN_U4SM", "outputId": "9aea6020-2dfd-4fae-d904-312ce1eafe99" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting pycountry\n", " Downloading pycountry-22.3.5.tar.gz (10.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m36.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from pycountry) (67.7.2)\n", "Building wheels for collected packages: pycountry\n", " Building wheel for pycountry (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pycountry: filename=pycountry-22.3.5-py2.py3-none-any.whl size=10681833 sha256=fc4004cf677b7fbbe25f7d2dcd3eb916196943025509f785db4abc12a7b9bea2\n", " Stored in directory: /root/.cache/pip/wheels/03/57/cc/290c5252ec97a6d78d36479a3c5e5ecc76318afcb241ad9dbe\n", "Successfully built pycountry\n", "Installing collected packages: pycountry\n", "Successfully installed pycountry-22.3.5\n", "Collecting emoji\n", " Downloading emoji-2.8.0-py2.py3-none-any.whl (358 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m358.9/358.9 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: emoji\n", "Successfully installed emoji-2.8.0\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ ":29: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopic
0Charlie Hebdo became well known for publishing...0.0BBCDanielScharliehebdo
1Now 10 dead in a shooting there today RT \"@BBC...0.0robbylevycharliehebdo
2@BBCDanielS @BBCWorld I'm guessing this is bei...0.0ModerateInAllcharliehebdo
3@BBCDanielS @BBCWorld why would you mention th...0.0GabTarquinicharliehebdo
4@BBCDanielS @BBCWorld perps identified?0.0freethought41charliehebdo
...............
62440@AnonyOps @Xplant So that means its ok to torc...1.0RianAldenferguson
62441@RianAlden not at all, but they need to change...1.0Xplantferguson
62442@Xplant @AnonyOps Absoluteky. But it pains me...1.0RianAldenferguson
62443@Xplant @AnonyOps I'm curious how many of thes...1.0RianAldenferguson
62444@Xplant @AnonyOps You get 15,000 people showin...1.0RianAldenferguson
\n", "

62445 rows × 4 columns

\n", "\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", " \n" ] }, "metadata": {}, "execution_count": 6 } ], "source": [ "data = pd.read_csv('/content/drive/MyDrive/Dataset/pheme/dataset.csv')\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "I-aqe8QLX665", "outputId": "a1cb0cb5-e1c9-45fa-d75e-be7ce62f8355" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text is_rumor \\\n", "4350 @Perseus009 @guardian After the 1962 Cuban Mis... 0.0 \n", "5520 @Independent After the 1962 Cuban Missile Cris... 0.0 \n", "6538 @cnni Call to register for this site and gain ... 0.0 \n", "6997 @bouckap At least 12 killed in shooting at off... 0.0 \n", "7477 @cnni At least 12 killed in shooting at office... 0.0 \n", "... ... ... \n", "62280 MSNBC is literally reporting that Police Chief... 1.0 \n", "62319 Remarkably, despite the military-style police ... 1.0 \n", "62339 Americans are 8 times more likely to be killed... 1.0 \n", "62408 I refuse to call the #Ferguson police chief in... 1.0 \n", "62427 Police in #Ferguson once charged a man w/ dest... 1.0 \n", "\n", " user.handle topic \n", "4350 mycubanfriends charliehebdo \n", "5520 mycubanfriends charliehebdo \n", "6538 ibraheemalmasri charliehebdo \n", "6997 hot10news charliehebdo \n", "7477 hot10news charliehebdo \n", "... ... ... \n", "62280 elonjames ferguson \n", "62319 TheDailyEdge ferguson \n", "62339 TheEyeOfControl ferguson \n", "62408 ProfessorCrunk ferguson \n", "62427 AnonyOps ferguson \n", "\n", "[800 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopic
4350@Perseus009 @guardian After the 1962 Cuban Mis...0.0mycubanfriendscharliehebdo
5520@Independent After the 1962 Cuban Missile Cris...0.0mycubanfriendscharliehebdo
6538@cnni Call to register for this site and gain ...0.0ibraheemalmasricharliehebdo
6997@bouckap At least 12 killed in shooting at off...0.0hot10newscharliehebdo
7477@cnni At least 12 killed in shooting at office...0.0hot10newscharliehebdo
...............
62280MSNBC is literally reporting that Police Chief...1.0elonjamesferguson
62319Remarkably, despite the military-style police ...1.0TheDailyEdgeferguson
62339Americans are 8 times more likely to be killed...1.0TheEyeOfControlferguson
62408I refuse to call the #Ferguson police chief in...1.0ProfessorCrunkferguson
62427Police in #Ferguson once charged a man w/ dest...1.0AnonyOpsferguson
\n", "

800 rows × 4 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 10 } ], "source": [ "data[data.duplicated()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vpcHx_9jYB60" }, "outputs": [], "source": [ "data = data.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 53 }, "id": "gPVchIxwYG6N", "outputId": "a81fd829-09e9-474d-c320-1cdd2ca1cd1b" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Empty DataFrame\n", "Columns: [text, is_rumor, user.handle, topic]\n", "Index: []" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopic
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "data[data.duplicated()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uVz9TJgZXtiZ", "outputId": "7cb80003-38a6-4e87-e229-524624c62204" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0 48018\n", "1.0 13625\n", "Name: is_rumor, dtype: int64" ] }, "metadata": {}, "execution_count": 16 } ], "source": [ "data['is_rumor'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 503 }, "id": "yqTbVQnlYjfq", "outputId": "90b9e74e-c937-4e4d-b946-c82470f94b27" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 17 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "data.is_rumor.value_counts().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jUMJByQiYrFm", "outputId": "0cbe0636-5ec1-47fa-b71c-3208cb4a720f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.8.1)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk) (1.3.2)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2023.6.3)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk) (4.66.1)\n" ] } ], "source": [ "!pip install --user -U nltk" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1_KrpPrnaQuT", "outputId": "fe5a18b1-8a7e-438e-d11e-10c2e4735e7f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: numpy in /root/.local/lib/python3.10/site-packages (1.25.2)\n" ] } ], "source": [ "!pip install --user -U numpy" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "il2aGIHiaWwo", "outputId": "04ef5330-d5ab-4484-f001-26d39ac984a3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: vaderSentiment in /usr/local/lib/python3.10/dist-packages (3.3.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from vaderSentiment) (2.31.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (1.26.16)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (2023.7.22)\n" ] } ], "source": [ "!pip install vaderSentiment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3jDc6l9Rsjdz", "outputId": "2c3e6a26-130e-4409-a5aa-84ead3e20347" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "[nltk_data] Downloading package vader_lexicon to /root/nltk_data...\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, "metadata": {}, "execution_count": 23 } ], "source": [ "import nltk\n", "nltk.download('vader_lexicon')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "4aXCcZFxsu-C" }, "outputs": [], "source": [ "sia_vader = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "oljPIItLs_IX" }, "outputs": [], "source": [ "sentiments = []\n", "for tweet in data.text:\n", " sentiment_dict = sia_vader.polarity_scores(tweet)\n", " sentiment_dict.pop('compound', None)\n", " sentiments.append(max(sentiment_dict , key=sentiment_dict.get))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "udIdHsKttOIr", "outputId": "27b05357-5056-4ee5-ee67-715c8a0205d4" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "neu 58197\n", "neg 1826\n", "pos 1622\n", "Name: sentiment, dtype: int64" ] }, "metadata": {}, "execution_count": 26 } ], "source": [ "data['sentiment'] = sentiments\n", "data['sentiment'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "VCrjJK0RtZX3", "outputId": "7b8897c8-85c5-40b6-df43-86a733ebefc2" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting demoji\n", " Downloading demoji-1.1.0-py3-none-any.whl (42 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.9/42.9 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: demoji\n", "Successfully installed demoji-1.1.0\n" ] } ], "source": [ "!pip install demoji" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "VZ47IJeltkua" }, "outputs": [], "source": [ "#text cleaning\n", "##CUSTOM DEFINED FUNCTIONS TO CLEAN THE TWEETS\n", "\n", "#Clean emojis from text\n", "def strip_emoji(text):\n", " return re.sub(emoji.get_emoji_regexp(), r\"\", text) #remove emoji\n", "\n", "#Remove punctuations, links, mentions and \\r\\n new line characters\n", "def strip_all_entities(text):\n", " text = text.replace('\\r', '').replace('\\n', ' ').replace('\\n', ' ').lower() #remove \\n and \\r and lowercase\n", " text = re.sub(r\"(?:\\@|https?\\://)\\S+\", \"\", text) #remove links and mentions\n", " text = re.sub(r'[^\\x00-\\x7f]',r'', text) #remove non utf8/ascii characters such as '\\x9a\\x91\\x97\\x9a\\x97'\n", " banned_list= string.punctuation + 'Ã'+'±'+'ã'+'¼'+'â'+'»'+'§'\n", " table = str.maketrans('', '', banned_list)\n", " text = text.translate(table)\n", " return text\n", "#clean hashtags at the end of the sentence, and keep those in the middle of the sentence by removing just the # symbol\n", "def clean_hashtags(tweet):\n", " new_tweet = \" \".join(word.strip() for word in re.split('#(?!(?:hashtag)\\b)[\\w-]+(?=(?:\\s+#[\\w-]+)*\\s*$)', tweet)) #remove last hashtags\n", " new_tweet2 = \" \".join(word.strip() for word in re.split('#|_', new_tweet)) #remove hashtags symbol from words in the middle of the sentence\n", " return new_tweet2\n", "\n", "#Filter special characters such as & and $ present in some words\n", "def filter_chars(a):\n", " sent = []\n", " for word in a.split(' '):\n", " if ('$' in word) | ('&' in word):\n", " sent.append('')\n", " else:\n", " sent.append(word)\n", " return ' '.join(sent)\n", "\n", "def remove_mult_spaces(text): ## remove multiple spaces\n", " return re.sub(\"\\s\\s+\" , \" \", text)\n", "\n", "\n", "def remove_spam(text):\n", " match = re.search(r'subscribe', text)\n", " if match:\n", " return ''\n", " else:\n", " return text" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "V_F8xE75uDUT" }, "outputs": [], "source": [ "data['text']=data['text'].apply(str)\n", "data['text'] = data['text'].fillna('').apply(str)\n", "texts_new = []\n", "for t in data.text:\n", " texts_new.append(remove_spam(remove_mult_spaces(filter_chars(clean_hashtags(strip_all_entities(t))))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xdQ2U-nSuU-X" }, "outputs": [], "source": [ "data['text_clean'] = texts_new" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "KO4-rDu5uZvr" }, "outputs": [], "source": [ "data['text_clean'] = data['text_clean'].str.lower()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "NRZ2qtnJuffo" }, "outputs": [], "source": [ "text_len = []\n", "for text in data.text_clean:\n", " tweet_len = len(text.split())\n", " text_len.append(tweet_len)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Trts6iyYulPq" }, "outputs": [], "source": [ "data['text_len'] = text_len" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 506 }, "id": "5NXpYupIuqq6", "outputId": "82002ffe-aba4-4b2c-c927-1a0ae53f7995" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "plt.figure(figsize=(10,5))\n", "sns.histplot(x='text_len', data=data, bins=20)\n", "plt.title('Cleaned text lenght')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 506 }, "id": "PWjZDaVFuu_G", "outputId": "d3016afe-5db6-416e-bb24-e21f2e025bd8" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "plt.figure(figsize=(7,5))\n", "ax = sns.countplot(x='text_len', data=data[data['text_len'] < 10], palette='mako')\n", "plt.title('Teets with less than 10 words')\n", "plt.yticks([])\n", "ax.bar_label(ax.containers[0])\n", "plt.ylabel('count')\n", "plt.xlabel('')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "JeOzOBVHvBQH" }, "outputs": [], "source": [ "data = data[data['text_len'] > 4]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xpYdUBY_vKVE" }, "outputs": [], "source": [ "sia_vader = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jnJAWcY7vPxv" }, "outputs": [], "source": [ "sentiments_vader = []\n", "for tweet in data.text_clean:\n", " sentiment_dict = sia_vader.polarity_scores(tweet)\n", " sentiment_dict.pop('compound', None)\n", " sentiments_vader.append(max(sentiment_dict , key=sentiment_dict.get))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OYUzWC5yvVwF", "outputId": "3df8ba12-70ed-4e57-d112-1e139233d154" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "neu 48919\n", "neg 2261\n", "pos 1261\n", "Name: sentiment_vader, dtype: int64" ] }, "metadata": {}, "execution_count": 39 } ], "source": [ "data['sentiment_vader'] = sentiments_vader\n", "data['sentiment_vader'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jBFVffd2vdn9" }, "outputs": [], "source": [ "def polarity_to_text(blob):\n", " if (blob.sentiment.polarity >= 0.2):\n", " return 'pos'\n", " elif(blob.sentiment.polarity < 0.2 and blob.sentiment.polarity >= -0.01):\n", " return 'neu'\n", " else:\n", " return 'neg'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0Xzksg8svlzN" }, "outputs": [], "source": [ "sentiments_blob = []\n", "for tweet in data.text_clean:\n", " blob = TextBlob(tweet)\n", " sentiments_blob.append(polarity_to_text(blob))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FT8izdeYvraN", "outputId": "1b1808fb-04c1-4c62-c8be-d1621a41407c" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "neu 27658\n", "neg 13556\n", "pos 11227\n", "Name: sentiment_blob, dtype: int64" ] }, "metadata": {}, "execution_count": 42 } ], "source": [ "data['sentiment_blob'] = sentiments_blob\n", "data['sentiment_blob'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 504 }, "id": "kECzmHC6vzLX", "outputId": "25851287-f7a2-4db5-f8b1-4ec8c5c8e872" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 43 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "data.sentiment_blob.value_counts().plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 580 }, "id": "y0AQ3_IzwRRD", "outputId": "351a554c-a4d4-491b-fa0f-9fdde514a385" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text is_rumor \\\n", "0 Charlie Hebdo became well known for publishing... 0.0 \n", "1 Now 10 dead in a shooting there today RT \"@BBC... 0.0 \n", "2 @BBCDanielS @BBCWorld I'm guessing this is bei... 0.0 \n", "3 @BBCDanielS @BBCWorld why would you mention th... 0.0 \n", "7 @GabTarquini @BBCDanielS @BBCWorld Maybe becau... 0.0 \n", "... ... ... \n", "62440 @AnonyOps @Xplant So that means its ok to torc... 1.0 \n", "62441 @RianAlden not at all, but they need to change... 1.0 \n", "62442 @Xplant @AnonyOps Absoluteky. But it pains me... 1.0 \n", "62443 @Xplant @AnonyOps I'm curious how many of thes... 1.0 \n", "62444 @Xplant @AnonyOps You get 15,000 people showin... 1.0 \n", "\n", " user.handle topic sentiment \\\n", "0 BBCDanielS charliehebdo neu \n", "1 robbylevy charliehebdo neu \n", "2 ModerateInAll charliehebdo neu \n", "3 GabTarquini charliehebdo neu \n", "7 S_Jakobsen charliehebdo neu \n", "... ... ... ... \n", "62440 RianAlden ferguson neu \n", "62441 Xplant ferguson neu \n", "62442 RianAlden ferguson neu \n", "62443 RianAlden ferguson neu \n", "62444 RianAlden ferguson neu \n", "\n", " text_clean text_len \\\n", "0 charlie hebdo became well known for publishing... 13 \n", "1 now 10 dead in a shooting there today rt charl... 22 \n", "2 im guessing this is being considered terrorism... 12 \n", "3 why would you mention that before knowing the ... 10 \n", "7 maybe because they shouted the prophet have be... 16 \n", "... ... ... \n", "62440 so that means its ok to torch and loot someone... 11 \n", "62441 not at all but they need to change some things... 13 \n", "62442 absoluteky but it pains me to see private citi... 20 \n", "62443 im curious how many of these protesters ever s... 21 \n", "62444 you get 15000 people showing up to a city meet... 21 \n", "\n", " sentiment_vader sentiment_blob \n", "0 neu neu \n", "1 neu neg \n", "2 neu pos \n", "3 neu neu \n", "7 neu pos \n", "... ... ... \n", "62440 neu pos \n", "62441 neu neu \n", "62442 neu neu \n", "62443 neu pos \n", "62444 neu pos \n", "\n", "[52441 rows x 9 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopicsentimenttext_cleantext_lensentiment_vadersentiment_blob
0Charlie Hebdo became well known for publishing...0.0BBCDanielScharliehebdoneucharlie hebdo became well known for publishing...13neuneu
1Now 10 dead in a shooting there today RT \"@BBC...0.0robbylevycharliehebdoneunow 10 dead in a shooting there today rt charl...22neuneg
2@BBCDanielS @BBCWorld I'm guessing this is bei...0.0ModerateInAllcharliehebdoneuim guessing this is being considered terrorism...12neupos
3@BBCDanielS @BBCWorld why would you mention th...0.0GabTarquinicharliehebdoneuwhy would you mention that before knowing the ...10neuneu
7@GabTarquini @BBCDanielS @BBCWorld Maybe becau...0.0S_Jakobsencharliehebdoneumaybe because they shouted the prophet have be...16neupos
..............................
62440@AnonyOps @Xplant So that means its ok to torc...1.0RianAldenfergusonneuso that means its ok to torch and loot someone...11neupos
62441@RianAlden not at all, but they need to change...1.0Xplantfergusonneunot at all but they need to change some things...13neuneu
62442@Xplant @AnonyOps Absoluteky. But it pains me...1.0RianAldenfergusonneuabsoluteky but it pains me to see private citi...20neuneu
62443@Xplant @AnonyOps I'm curious how many of thes...1.0RianAldenfergusonneuim curious how many of these protesters ever s...21neupos
62444@Xplant @AnonyOps You get 15,000 people showin...1.0RianAldenfergusonneuyou get 15000 people showing up to a city meet...21neupos
\n", "

52441 rows × 9 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 44 } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "6D-DpF3ywlbt", "outputId": "b29ffda2-7298-4d4d-bbcd-2be89a0899f6" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " is_rumor sentiment_blob\n", "0 0.0 neu\n", "1 0.0 neg\n", "2 0.0 pos\n", "3 0.0 neu\n", "7 0.0 pos\n", "... ... ...\n", "62440 1.0 pos\n", "62441 1.0 neu\n", "62442 1.0 neu\n", "62443 1.0 pos\n", "62444 1.0 pos\n", "\n", "[52441 rows x 2 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
is_rumorsentiment_blob
00.0neu
10.0neg
20.0pos
30.0neu
70.0pos
.........
624401.0pos
624411.0neu
624421.0neu
624431.0pos
624441.0pos
\n", "

52441 rows × 2 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 45 } ], "source": [ "data[['is_rumor','sentiment_blob']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "qB2CpXMbyCO7" }, "outputs": [], "source": [ "#no relevance\n", "data[data['is_rumor']==1.0]\n", "\n", "#subsetDataFrame = data[data['sentiment_blob'] == 'pos']\n", "filterinfDataframe = data[(data['is_rumor'] == 1.0) & (data['sentiment_blob'] =='pos') ]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 580 }, "id": "gFIMlUrPAb2j", "outputId": "4451e384-62a2-4520-f84a-86557fae31da" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text is_rumor \\\n", "30925 @H_E_Samuel Hi Henry would you be willing to g... 1.0 \n", "30926 @H_E_Samuel @H_E_Samuel please call them terro... 1.0 \n", "30931 @EdwardBowden @H_E_Samuel @George_Berridge bla... 1.0 \n", "30940 @euronews LOL. 5 million Muslims in France, wh... 1.0 \n", "30942 @Channel4News @GidonShaviv must be that peace ... 1.0 \n", "... ... ... \n", "62417 @ProfessorCrunk Right now, he's fighting for h... 1.0 \n", "62423 @ProfessorCrunk you don't have to be competent... 1.0 \n", "62440 @AnonyOps @Xplant So that means its ok to torc... 1.0 \n", "62443 @Xplant @AnonyOps I'm curious how many of thes... 1.0 \n", "62444 @Xplant @AnonyOps You get 15,000 people showin... 1.0 \n", "\n", " user.handle topic sentiment \\\n", "30925 NickyRusmith NaN neu \n", "30926 pravsly NaN neu \n", "30931 imranali27 NaN neu \n", "30940 NeoSkywalker13 NaN neu \n", "30942 onielio NaN pos \n", "... ... ... ... \n", "62417 aagha_personal ferguson neu \n", "62423 ElGringoSpeaks ferguson neu \n", "62440 RianAlden ferguson neu \n", "62443 RianAlden ferguson neu \n", "62444 RianAlden ferguson neu \n", "\n", " text_clean text_len \\\n", "30925 hi henry would you be willing to give itv news... 20 \n", "30926 please call them terrorists not gunmen dont di... 21 \n", "30931 blame the shooter not the religion because the... 16 \n", "30940 lol 5 million muslims in france what a disgrac... 21 \n", "30942 must be that peace loving religion again 7 \n", "... ... ... \n", "62417 right now hes fighting for his jobnot justice ... 14 \n", "62423 you dont have to be competent to be malicious 9 \n", "62440 so that means its ok to torch and loot someone... 11 \n", "62443 im curious how many of these protesters ever s... 21 \n", "62444 you get 15000 people showing up to a city meet... 21 \n", "\n", " sentiment_vader sentiment_blob \n", "30925 neu pos \n", "30926 neu pos \n", "30931 neu pos \n", "30940 neu pos \n", "30942 pos pos \n", "... ... ... \n", "62417 neu pos \n", "62423 neu pos \n", "62440 neu pos \n", "62443 neu pos \n", "62444 neu pos \n", "\n", "[2157 rows x 9 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopicsentimenttext_cleantext_lensentiment_vadersentiment_blob
30925@H_E_Samuel Hi Henry would you be willing to g...1.0NickyRusmithNaNneuhi henry would you be willing to give itv news...20neupos
30926@H_E_Samuel @H_E_Samuel please call them terro...1.0pravslyNaNneuplease call them terrorists not gunmen dont di...21neupos
30931@EdwardBowden @H_E_Samuel @George_Berridge bla...1.0imranali27NaNneublame the shooter not the religion because the...16neupos
30940@euronews LOL. 5 million Muslims in France, wh...1.0NeoSkywalker13NaNneulol 5 million muslims in france what a disgrac...21neupos
30942@Channel4News @GidonShaviv must be that peace ...1.0onielioNaNposmust be that peace loving religion again7pospos
..............................
62417@ProfessorCrunk Right now, he's fighting for h...1.0aagha_personalfergusonneuright now hes fighting for his jobnot justice ...14neupos
62423@ProfessorCrunk you don't have to be competent...1.0ElGringoSpeaksfergusonneuyou dont have to be competent to be malicious9neupos
62440@AnonyOps @Xplant So that means its ok to torc...1.0RianAldenfergusonneuso that means its ok to torch and loot someone...11neupos
62443@Xplant @AnonyOps I'm curious how many of thes...1.0RianAldenfergusonneuim curious how many of these protesters ever s...21neupos
62444@Xplant @AnonyOps You get 15,000 people showin...1.0RianAldenfergusonneuyou get 15000 people showing up to a city meet...21neupos
\n", "

2157 rows × 9 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 47 } ], "source": [ "filterinfDataframe = data[(data['is_rumor'] == 1.0) & (data['sentiment_blob'] =='pos')]\n", "filterinfDataframe" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7HTmn-xWzEJ_", "outputId": "7fc9be7b-5d89-4c11-c19b-e6f17c1c30d6" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: flair in /usr/local/lib/python3.10/dist-packages (0.12.2)\n", "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.10/dist-packages (from flair) (2.8.2)\n", "Requirement already satisfied: torch!=1.8,>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from flair) (2.0.0)\n", "Requirement already satisfied: gensim>=3.8.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.3.1)\n", "Requirement already satisfied: tqdm>=4.26.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.66.1)\n", "Requirement already satisfied: segtok>=1.5.7 in /usr/local/lib/python3.10/dist-packages (from flair) (1.5.11)\n", "Requirement already satisfied: matplotlib>=2.2.3 in /usr/local/lib/python3.10/dist-packages (from flair) (3.7.1)\n", "Requirement already satisfied: mpld3==0.3 in /usr/local/lib/python3.10/dist-packages (from flair) (0.3)\n", "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from flair) (1.2.2)\n", "Requirement already satisfied: sqlitedict>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from flair) (2.1.0)\n", "Requirement already satisfied: deprecated>=1.2.4 in /usr/local/lib/python3.10/dist-packages (from flair) (1.2.14)\n", "Requirement already satisfied: hyperopt>=0.2.7 in /usr/local/lib/python3.10/dist-packages (from flair) (0.2.7)\n", "Requirement already satisfied: boto3 in /usr/local/lib/python3.10/dist-packages (from flair) (1.28.36)\n", "Requirement already satisfied: transformers[sentencepiece]>=4.18.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.32.1)\n", "Requirement already satisfied: bpemb>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from flair) (0.3.4)\n", "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from flair) (2023.6.3)\n", "Requirement already satisfied: tabulate in /usr/local/lib/python3.10/dist-packages (from flair) (0.9.0)\n", "Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from flair) (1.0.9)\n", "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from flair) (4.9.3)\n", "Requirement already satisfied: ftfy in /usr/local/lib/python3.10/dist-packages (from flair) (6.1.1)\n", "Requirement already satisfied: janome in /usr/local/lib/python3.10/dist-packages (from flair) (0.5.0)\n", "Requirement already satisfied: gdown==4.4.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.4.0)\n", "Requirement already satisfied: huggingface-hub>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from flair) (0.16.4)\n", "Requirement already satisfied: conllu>=4.0 in /usr/local/lib/python3.10/dist-packages (from flair) (4.5.3)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from flair) (10.1.0)\n", "Requirement already satisfied: wikipedia-api in /usr/local/lib/python3.10/dist-packages (from flair) (0.6.0)\n", "Requirement already satisfied: pptree in /usr/local/lib/python3.10/dist-packages (from flair) (3.1)\n", "Requirement already satisfied: pytorch-revgrad in /usr/local/lib/python3.10/dist-packages (from flair) (0.2.0)\n", "Requirement already satisfied: transformer-smaller-training-vocab>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from flair) (0.3.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (3.12.2)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (2.31.0)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (1.16.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown==4.4.0->flair) (4.11.2)\n", "Requirement already satisfied: numpy in /root/.local/lib/python3.10/site-packages (from bpemb>=0.3.2->flair) (1.25.2)\n", "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from bpemb>=0.3.2->flair) (0.1.99)\n", "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.4->flair) (1.14.1)\n", "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=3.8.0->flair) (1.10.1)\n", "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=3.8.0->flair) (6.3.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (2023.6.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (6.0.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (4.7.1)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.10.0->flair) (23.1)\n", "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (3.1)\n", "Requirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (0.18.3)\n", "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (2.2.1)\n", "Requirement already satisfied: py4j in /usr/local/lib/python3.10/dist-packages (from hyperopt>=0.2.7->flair) (0.10.9.7)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (1.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (4.42.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (1.4.4)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (9.4.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.2.3->flair) (3.1.1)\n", "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->flair) (1.3.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->flair) (3.2.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (1.12)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (3.1.2)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.7.99)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.7.99)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu11==11.7.101 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.7.101)\n", "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (8.5.0.96)\n", "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.10.3.66)\n", "Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (10.9.0.58)\n", "Requirement already satisfied: nvidia-curand-cu11==10.2.10.91 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (10.2.10.91)\n", "Requirement already satisfied: nvidia-cusolver-cu11==11.4.0.1 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.4.0.1)\n", "Requirement already satisfied: nvidia-cusparse-cu11==11.7.4.91 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.7.4.91)\n", "Requirement already satisfied: nvidia-nccl-cu11==2.14.3 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (2.14.3)\n", "Requirement already satisfied: nvidia-nvtx-cu11==11.7.91 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (11.7.91)\n", "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.8,>=1.5.0->flair) (2.0.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch!=1.8,>=1.5.0->flair) (67.7.2)\n", "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch!=1.8,>=1.5.0->flair) (0.41.2)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.8,>=1.5.0->flair) (3.27.2)\n", "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.8,>=1.5.0->flair) (16.0.6)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.18.0->flair) (0.13.3)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.18.0->flair) (0.3.3)\n", "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.18.0->flair) (3.20.3)\n", "Requirement already satisfied: botocore<1.32.0,>=1.31.36 in /usr/local/lib/python3.10/dist-packages (from boto3->flair) (1.31.36)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from boto3->flair) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from boto3->flair) (0.6.2)\n", "Requirement already satisfied: wcwidth>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ftfy->flair) (0.2.6)\n", "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.32.0,>=1.31.36->boto3->flair) (1.26.16)\n", "Requirement already satisfied: accelerate>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]>=4.18.0->flair) (0.22.0)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown==4.4.0->flair) (2.4.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.8,>=1.5.0->flair) (2.1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (2023.7.22)\n", "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown==4.4.0->flair) (1.7.1)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.8,>=1.5.0->flair) (1.3.0)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[sentencepiece]>=4.18.0->flair) (5.9.5)\n" ] } ], "source": [ "!pip install flair" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 234, "referenced_widgets": [ "4a6a9580116d4ddaa67db8d005f21327", "59045e07a2244bfc866502109b61c048", "f42b605677194c4681aa7ad08c940cdd", "c037b4a2ba0149e68cdbf9fd62b7bc65", "2546e6cb8d544a8f8801f0d8c749ac46", "a395dbf85af54838809d79a6e9367fe5", "1e451dcbeca14b95bd3d7ff546ed0ab3", "b811541bd23340f9bd2bd786efe0371d", "fda1445807e94776b607aace18000d20", "0645e921315a41498e0ca44bc3813922", "6b06cd82d171494bb3ba4838ee35e221", "0d232a6b1ac945b48157d4bc2bce5331", "49b3bb27e6334611a81b7c16cfa128c9", "19e60b7600a74a14802e6b726583a019", "56ac84afb4764a80b1830a700d88e3f5", "ff23c8632ad6413e815b7948b05f79a7", "613715b678934e6dbc139dc155e973dd", "2fe99d3bdae94540944f4cfd53122263", "e2a3b968093c4092b619e9f383bfff1a", "d53dc05aea7c4452aa7e9f36ef4e61b5", "e1e89eeca5e94f91ab6b835871f44ca6", "2baca0d555124aa0abedfd1d62aea660", "7c68767beaff489298dcf2bb47af43fd", "dab3499e56844088a33c61793d617b7e", "02c9e29a20564585925e82694c33cff0", "ad8f233c98ed4e99b90af6ed90ddcf26", "2bb1283a622845a79528ba0b96511fa3", "ba562193b3ca460e8e5e1b5fa22b1815", "f4aa61e7c89c409cb7590934c45f8274", "815e8594212144bab34934b281868683", "3970a49b216d42aea8ee4bb915c2a8cc", "08a9835d53b64edc8b883ea588c724e5", "0fb63f379970484791b247374c17cc1b", "6b874459216b42228cc3855efe264985", "190e0fea340245ba96770b908b33ba49", "272e8309da264ef29a6624973e97697b", "3258a05092d34e6992a3f9131c0fa8a9", "d6b68455f8c9475ba814cdfab5db84af", "e1409c0a86704ef1bde106dd5e67380b", "ed2a8e97b9644178a82bf6165386a6ec", "89cba3ca2a4e488a9464ff4737006c7c", "c509ae48b5d549cb9965de19b99776ca", "3b7f992ad7cc429fba033af430618173", "a1a81d0bf19a4e978340d6486475d33b" ] }, "id": "BSx7M807WsMO", "outputId": "36adb2b0-ce09-49c5-c29d-d73f292d7f4d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-08-29 05:19:41,880 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmp3eomhzt6\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 253M/253M [00:15<00:00, 17.3MB/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "2023-08-29 05:19:57,865 copying /tmp/tmp3eomhzt6 to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "2023-08-29 05:19:59,033 removing temp file /tmp/tmp3eomhzt6\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/28.0 [00:00\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textis_rumoruser.handletopicsentimenttext_cleantext_lensentiment_vadersentiment_bloblabel
0Charlie Hebdo became well known for publishing...0.0BBCDanielScharliehebdoneucharlie hebdo became well known for publishing...13neuneuPOSITIVE
1Now 10 dead in a shooting there today RT \"@BBC...0.0robbylevycharliehebdoneunow 10 dead in a shooting there today rt charl...22neunegPOSITIVE
2@BBCDanielS @BBCWorld I'm guessing this is bei...0.0ModerateInAllcharliehebdoneuim guessing this is being considered terrorism...12neuposNEGATIVE
3@BBCDanielS @BBCWorld why would you mention th...0.0GabTarquinicharliehebdoneuwhy would you mention that before knowing the ...10neuneuNEGATIVE
7@GabTarquini @BBCDanielS @BBCWorld Maybe becau...0.0S_Jakobsencharliehebdoneumaybe because they shouted the prophet have be...16neuposNEGATIVE
.................................
62440@AnonyOps @Xplant So that means its ok to torc...1.0RianAldenfergusonneuso that means its ok to torch and loot someone...11neuposNEGATIVE
62441@RianAlden not at all, but they need to change...1.0Xplantfergusonneunot at all but they need to change some things...13neuneuNEGATIVE
62442@Xplant @AnonyOps Absoluteky. But it pains me...1.0RianAldenfergusonneuabsoluteky but it pains me to see private citi...20neuneuNEGATIVE
62443@Xplant @AnonyOps I'm curious how many of thes...1.0RianAldenfergusonneuim curious how many of these protesters ever s...21neuposNEGATIVE
62444@Xplant @AnonyOps You get 15,000 people showin...1.0RianAldenfergusonneuyou get 15000 people showing up to a city meet...21neuposPOSITIVE
\n", "

52441 rows × 10 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", " \n" ] }, "metadata": {}, "execution_count": 51 } ] }, { "cell_type": "code", "source": [ "data.label.value_counts()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "M1dM2QE8OmfA", "outputId": "6732dfb6-9426-45f6-d0bb-be92b10d214d" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "NEGATIVE 37543\n", "POSITIVE 14898\n", "Name: label, dtype: int64" ] }, "metadata": {}, "execution_count": 53 } ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "C7LRznzZ8HBt" }, "outputs": [], "source": [ "from flair.nn import Classifier\n", "from flair.data import Sentence\n", "tagger = Classifier.load('sentiment')\n", "labels = []\n", "for sentence in data.text_clean:\n", " sentence = Sentence(sentence)\n", " tagger.predict(sentence)\n", " labels.append(sentence)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 214, "referenced_widgets": [ "b3b8d7e9db6e432f8239404ccb742393", "370665f151284dc1875516a179be5cde", "a2757f02948f43ff9e49b1692b913598", "5035429bb8ce448b8ede488ad1938e74", "5a4b33bbfe334eaebebecc0f18dfe14c", "f870a835a9e44483bfc8a36837f6c416", "eb97e44493f7403b8f053c497beb36ee", "53a2958f03334b3cb1e3d764842626ad", "269f2950bf92403399911a9b4ac0571b", "df7b7ff06368440ca54f0e0b34bb27ec", "ab9bb53499d146138996d56dff7b000b", "6e12aefdb4914158b72b6ccd329c63a3", "e7ef92e7b67f41518058adc02eefcd68", "894c187cd59e4484b5603c4e5c0c36ba", "a7ce7b2d01fb4c46b8fb698370c5c02d", "dfd06ff56ad8494caafbf2ef1e0e1ccd", "34a5e1ef261743cc886cb58329d479e0", "74122d3ad6c4437bad9bf105586eb017", "76727a7add2f4391b3292d73cb0cb4b8", "634f01e7940a4c998f115b0d99d3c2f5", "706a72ddcbca4204932f7e4bb42c4f49", "f850a57656ab4a1a8cf54b6d622c7a4b", "ea7ff76cf2d34689a3754c6af412b1b2", "7b114d2de0bd4fd8a69e8303c1895de0", "42fb055bf6c14f8b91c06b46d170084c", "772c50f29a3540659be4133991647b05", "34fa667cd7134c9db05a0b247a4feca9", "092674977076491cacef894a127c683b", "70ed64666e7d434190327e1abd181034", "51ce722d544045439cc1828b0377592e", "949fd12e6a904943b977001260d7a32d", "c6ad3756ca124a86a587c8b72b6aa6fc", "a5db240d76aa4432947bbe30ba91df45", "5ac7f0eb7bf040a59c5d5b6887c3926b", "a54bfd41441241fe9375fe3b5a1a7e90", "9426b0cea645428bb7aef7a78ac0958d", "af048843a8c849e4839976550e6699db", "ad9ccb637afe4adab971f2f86f9ea4f1", "15cc1d55b0f744ec9d253e13c26842a2", "495febbc007e412f8fd506ed03c932f6", "329cfd81a32e425087e29e18c99421d5", "e29aabfa2d15482ba92823ed2d17bfa5", "0ea952386d1f44a2af8f609e861eaecc", "efa5ed771bf848ce8bd99c4c965929d6" ] }, "id": "7jha3YzKJGJY", "outputId": "d65d9250-5e14-4157-f6ff-4dd1e1cf8f68" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2023-08-01 12:02:26,005 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpux3s6vo_\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 253M/253M [00:12<00:00, 21.2MB/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "2023-08-01 12:02:39,021 copying /tmp/tmpux3s6vo_ to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "2023-08-01 12:02:40,230 removing temp file /tmp/tmpux3s6vo_\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3b8d7e9db6e432f8239404ccb742393", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/28.0 [00:00