{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 69 }, "colab_type": "code", "id": "jQbAISKpl9TD", "outputId": "7c5f3fd3-9ff8-4ab1-f494-e4b5f3f3de74" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Unzipping tokenizers/punkt.zip.\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "import nltk\n", "nltk.download('punkt')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 124 }, "colab_type": "code", "id": "yZJ9hWOJCHgr", "outputId": "0712cb2b-58d7-48b2-95f8-3de36a00edaf" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting vaderSentiment\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/86/9e/c53e1fc61aac5ee490a6ac5e21b1ac04e55a7c2aba647bb8411c9aadf24e/vaderSentiment-3.2.1-py2.py3-none-any.whl (125kB)\n", "\r\u001b[K |██▋ | 10kB 13.2MB/s eta 0:00:01\r\u001b[K |█████▏ | 20kB 1.8MB/s eta 0:00:01\r\u001b[K |███████▉ | 30kB 2.5MB/s eta 0:00:01\r\u001b[K |██████████▍ | 40kB 1.7MB/s eta 0:00:01\r\u001b[K |█████████████ | 51kB 2.1MB/s eta 0:00:01\r\u001b[K |███████████████▋ | 61kB 2.5MB/s eta 0:00:01\r\u001b[K |██████████████████▎ | 71kB 2.9MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 81kB 3.3MB/s eta 0:00:01\r\u001b[K |███████████████████████▍ | 92kB 3.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████ | 102kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▋ | 112kB 2.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▎| 122kB 2.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 133kB 2.8MB/s \n", "\u001b[?25hInstalling collected packages: vaderSentiment\n", "Successfully installed vaderSentiment-3.2.1\n" ] } ], "source": [ "!pip install vaderSentiment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "SWAJ8iL-B1w2" }, "outputs": [], "source": [ "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", "#from vaderSentiment import SentimentIntensityAnalyzer\n", "import json\n", "#import amazon_review_scraper\n", "from io import open\n", "from pprint import pprint\n", "# --- examples -------\n", "def sentiment_analysis(sentence):\n", "\n", " analyzer = SentimentIntensityAnalyzer()\n", "\n", " vs = analyzer.polarity_scores(sentence)\n", " vs['review'] = sentence\n", " if vs['compound'] < 0.0:\n", " vs['sentiment'] = -1\n", " elif vs['compound'] > 0.1:\n", " vs['sentiment'] = 1\n", " else:\n", " vs['sentiment'] = 0\n", "\n", " return vs['sentiment']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "colab_type": "code", "id": "LAyFl-zlapS3", "outputId": "7fa8421a-eb86-4b41-cd7f-dd1844e6d626" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "===========================\n", "Retrieving TXT File\n", "===========================\n", "Retrieving Successfull\n", "=========================== \n", " \n", "\n", "892638644309065728\n", "An exception occurred\n", "408797755\n", "An exception occurred\n", "822485321745043457\n", "An exception occurred\n", "1122416537418133504\n", "An exception occurred\n", "2733071810\n", "An exception occurred\n", "1110221246501781513\n", "An exception occurred\n", "796710261205909505\n", "An exception occurred\n", "559478175\n", "An exception occurred\n", "26879434\n", "An exception occurred\n", "20275274\n", "An exception occurred\n", "3230512663\n", "An exception occurred\n", "1205065861\n", "An exception occurred\n", "65735401\n", "An exception occurred\n", "134244660\n", "An exception occurred\n", "975168712838758401\n", "An exception occurred\n", "1562417611\n", "An exception occurred\n", "78481599\n", "An exception occurred\n", "971408088061288448\n", "An exception occurred\n", "173064517\n", "An exception occurred\n", "2891045762\n", "An exception occurred\n", "356793134\n", "An exception occurred\n", "904783564927111169\n", "An exception occurred\n", "97949384\n", "An exception occurred\n", "311459442\n", "An exception occurred\n", "86881024\n", "An exception occurred\n", "2285878268\n", "An exception occurred\n", "2341430058\n", "An exception occurred\n", "805012972921729024\n", "An exception occurred\n", "1152596043302952963\n", "An exception occurred\n", "15016036\n", "An exception occurred\n", "46375899\n", "An exception occurred\n", "4600341193\n", "An exception occurred\n", "24134004\n", "An exception occurred\n", "983715867522359296\n", "An exception occurred\n", "770925484406411264\n", "An exception occurred\n", "897278332219179008\n", "An exception occurred\n", "1021791546914549760\n", "An exception occurred\n", "371384526\n", "An exception occurred\n", "144498030\n", "An exception occurred\n", "2300505560\n", "An exception occurred\n", "20245336\n", "An exception occurred\n", "2514551718\n", "An exception occurred\n", "1599428166\n", "An exception occurred\n", "955864679804366848\n", "An exception occurred\n", "1666961048\n", "An exception occurred\n", "137710451\n", "An exception occurred\n", "491986148\n", "An exception occurred\n", "2411395526\n", "An exception occurred\n", "77216274\n", "An exception occurred\n", "759903902879715328\n", "An exception occurred\n", "857582200216309761\n", "An exception occurred\n", "1106188040878329858\n", "An exception occurred\n", "1009749277524946944\n", "An exception occurred\n", "1149542983101890561\n", "An exception occurred\n", "157375861\n", "An exception occurred\n", "18477256\n", "An exception occurred\n", "1154710297476661249\n", "An exception occurred\n", "934004265630162944\n", "An exception occurred\n", "227592654\n", "An exception occurred\n", "757321466\n", "An exception occurred\n", "2563179614\n", "An exception occurred\n", "804024440\n", "An exception occurred\n", "901144885289340928\n", "An exception occurred\n", "65106504\n", "An exception occurred\n", "861054567517229057\n", "An exception occurred\n", "90227819\n", "An exception occurred\n", "782839561416171520\n", "An exception occurred\n", "1040909713871921153\n", "An exception occurred\n", "890812907109326849\n", "An exception occurred\n", "3256767140\n", "An exception occurred\n", "3997432706\n", "An exception occurred\n", "1329862512\n", "An exception occurred\n", "1071109857321803776\n", "An exception occurred\n", "96083073\n", "An exception occurred\n", "958591579975028736\n", "An exception occurred\n", "311847704\n", "An exception occurred\n", "1097122838744449024\n", "An exception occurred\n", "308039496\n", "An exception occurred\n", "831755798078439425\n", "An exception occurred\n", "4567778482\n", "An exception occurred\n", "901785749489680388\n", "An exception occurred\n", "1012698093546979331\n", "An exception occurred\n", "1158967869365006336\n", "An exception occurred\n", "1561105206\n", "An exception occurred\n", "445146308\n", "An exception occurred\n", "107439729\n", "An exception occurred\n", "314754053\n", "An exception occurred\n", "1036305905434877952\n", "An exception occurred\n", "92673487\n", "An exception occurred\n", "846319498873450496\n", "An exception occurred\n", "3017172452\n", "An exception occurred\n", "3178874586\n", "An exception occurred\n", "107973492\n", "An exception occurred\n", "837022666443882497\n", "An exception occurred\n", "270203083\n", "An exception occurred\n", "786830091963596800\n", "An exception occurred\n", "22804865\n", "An exception occurred\n", "2798257530\n", "An exception occurred\n", "1011112353247694848\n", "An exception occurred\n", "3167744143\n", "An exception occurred\n", "1393060716\n", "An exception occurred\n", "4091017285\n", "An exception occurred\n", "348871916\n", "An exception occurred\n", "1028243106624237568\n", "An exception occurred\n", "2221866066\n", "An exception occurred\n", "881240947161534464\n", "An exception occurred\n", "593908122\n", "An exception occurred\n", "113630860\n", "An exception occurred\n", "1136336520678334464\n", "An exception occurred\n", "352885686\n", "An exception occurred\n", "1154447236862009346\n", "An exception occurred\n", "774127209929859076\n", "An exception occurred\n", "2272758913\n", "An exception occurred\n", "14962178\n", "An exception occurred\n", "1248182096\n", "An exception occurred\n", "158971846\n", "An exception occurred\n", "46375899\n", "An exception occurred\n", "278448166\n", "An exception occurred\n", "913992068\n", "An exception occurred\n", "1138060394826686464\n", "An exception occurred\n", "1091661207373676545\n", "An exception occurred\n", "32456965\n", "An exception occurred\n", "1074803332416446469\n", "An exception occurred\n", "1726563888\n", "An exception occurred\n", "766735114390769665\n", "An exception occurred\n", "1096790967246409728\n", "An exception occurred\n", "708236539353030656\n", "An exception occurred\n", "1153018331739447296\n", "An exception occurred\n", "758574278757355520\n", "An exception occurred\n", "1396723404\n", "An exception occurred\n", "482529659\n", "An exception occurred\n", "1145806722226954246\n", "An exception occurred\n", "245192750\n", "An exception occurred\n", "216567279\n", "An exception occurred\n", "261588025\n", "An exception occurred\n", "21545080\n", "An exception occurred\n", "963373569085337600\n", "An exception occurred\n", "112709207\n", "An exception occurred\n", "1154710297476661249\n", "An exception occurred\n", "3082010718\n", "An exception occurred\n", "1087879100\n", "An exception occurred\n", "808852207621652480\n", "An exception occurred\n", "20421221\n", "An exception occurred\n", "996472707037491200\n", "An exception occurred\n", "2570845124\n", "An exception occurred\n", "946409530853855232\n", "An exception occurred\n", "930965263083716608\n", "An exception occurred\n", "97392780\n", "An exception occurred\n", "805280094784659456\n", "An exception occurred\n", "710704451888291841\n", "An exception occurred\n", "783204794869493760\n", "An exception occurred\n", "592524922\n", "An exception occurred\n", "1000670566188109825\n", "An exception occurred\n", "964107738\n", "An exception occurred\n", "826263947451957248\n", "An exception occurred\n", "1103735047478116352\n", "An exception occurred\n", "2910098127\n", "An exception occurred\n", "1162255861\n", "An exception occurred\n", "20336618\n", "An exception occurred\n", "206711192\n", "An exception occurred\n", "1090116885398646784\n", "An exception occurred\n", "21929443\n", "An exception occurred\n", "2918202769\n", "An exception occurred\n", "1157341631181447169\n", "An exception occurred\n", "407804928\n", "An exception occurred\n", "87603897\n", "An exception occurred\n", "1096222654799986688\n", "An exception occurred\n", "159460968\n", "An exception occurred\n", "295723377\n", "An exception occurred\n", "931513643073642496\n", "An exception occurred\n", "156915624\n", "An exception occurred\n", "84020096\n", "An exception occurred\n", "2500718288\n", "An exception occurred\n", "2608534010\n", "An exception occurred\n", "231438804\n", "An exception occurred\n", "767404787772092420\n", "An exception occurred\n", "1021230860173967360\n", "An exception occurred\n", "1041191195731406848\n", "An exception occurred\n", "183566262\n", "An exception occurred\n", "59730879\n", "An exception occurred\n", "16293020\n", "An exception occurred\n", "752243088798560256\n", "An exception occurred\n", "4892318567\n", "An exception occurred\n", "20858168\n", "An exception occurred\n", "943788254\n", "An exception occurred\n", "2154088153\n", "An exception occurred\n", "32827475\n", "An exception occurred\n", "4374968255\n", "An exception occurred\n", "2337746757\n", "An exception occurred\n", "952354093\n", "An exception occurred\n", "1006708808402554881\n", "An exception occurred\n", "1028243106624237568\n", "An exception occurred\n", "806156400\n", "An exception occurred\n", "1028638812\n", "An exception occurred\n", "330369832\n", "An exception occurred\n", "848790842550734849\n", "An exception occurred\n", "54498329\n", "An exception occurred\n", "614114966\n", "An exception occurred\n", "2391614580\n", "An exception occurred\n", "749466555382132736\n", "An exception occurred\n", "1114915392818798593\n", "An exception occurred\n", "1158691727655366658\n", "An exception occurred\n", "1009976953691131905\n", "An exception occurred\n", "377923381\n", "An exception occurred\n", "28126760\n", "An exception occurred\n", "337370333\n", "An exception occurred\n", "1463336180\n", "An exception occurred\n", "1000689112603549697\n", "An exception occurred\n", "755790170\n", "An exception occurred\n", "57842664\n", "An exception occurred\n", "240460073\n", "An exception occurred\n", "199228449\n", "An exception occurred\n", "174411523\n", "An exception occurred\n", "2417774784\n", "An exception occurred\n", "22170818\n", "An exception occurred\n", "===========================\n", "Data fb Recovered\n", "===========================\n", "\n", "\n" ] } ], "source": [ "\n", "import json\n", "import csv\n", "from nltk.tokenize import word_tokenize\n", "import string\n", "import re\n", "import time\n", "import pandas as pd\n", "\n", "\n", "fb_data = []\n", "x = []\n", "y = []\n", "k = []\n", "some_milby = []\n", "\n", "def getdata(dataurl):\n", " print(\"===========================\")\n", " print(\"Retrieving TXT File\")\n", " fb_data_path = dataurl\n", " fb_file = open(fb_data_path, \"r\")\n", " for line in fb_file:\n", " try:\n", " fb = json.loads(line)\n", " fb_data.append(fb)\n", " except:\n", " continue\n", " print(\"===========================\")\n", " print(\"Retrieving Successfull\")\n", " print(\"=========================== \\n \\n\")\n", " time.sleep(3)\n", " processdata()\n", "\n", "\n", "def processdata():\n", "\n", " time.sleep(1)\n", " RE_EMOJI = re.compile('[\\U00010000-\\U0010ffff]', flags=re.UNICODE)\n", " for i in range(len(fb_data)):\n", " u = []\n", " i1 = 0\n", " i2 = 0\n", " i3 = 0\n", " u.append(fb_data[i]['user']['id'])\n", " print(fb_data[i]['user']['id'])\n", " u.append(fb_data[i]['user']['followers_count'])\n", " try:\n", " i1 = fb_data[i]['retweeted_status']['retweet_count']\n", " i2 = fb_data[i]['retweeted_status']['reply_count']\n", " i3 = fb_data[i]['retweeted_status']['favourite_count'] \n", " except:\n", " print(\"An exception occurred\")\n", " i4 = (int(fb_data[i]['user']['statuses_count']) - int(i1)) /(float(fb_data[i]['user']['statuses_count']))\n", " if abs(i4) > 1 :\n", " i4 = 0.3\n", " q = fb_data[i]['text']\n", " o = fb_data[i]['id_str']\n", " q = RE_EMOJI.sub(r'', q)\n", " \n", " i = q.translate(str.maketrans('','',string.punctuation))\n", " u.append(i)\n", " u.append(i1)\n", " u.append(i2)\n", " u.append(i3)\n", " u.append(i4)\n", " u.append(sentiment_analysis(i))\n", " x.append(u)\n", " k.append(o)\n", " \n", " print(\"===========================\")\n", " print(\"Data fb Recovered\")\n", " print(\"===========================\\n\\n\")\n", " \n", " \n", " \n", "\n", " \n", "\n", " \n", " \n", " savetoxlsx()\n", " \n", "def savetoxlsx():\n", " #df = pd.DataFrame(some_milby)\n", " dt = pd.DataFrame(x)\n", " dt\n", " dt.to_excel('output_text4.xlsx', header=(\"id\",\"followers_count\",\"text\",\"retweet_count\",\"reply_count\",\"favourite_count\",\"focus_rate\",\"sentiment\"), index=False)\n", "\n", " \n", "\n", "def runall():\n", " getdata('tweetdata1.txt')\n", " \n", "\n", "\n", "runall()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "uu_f5dR8C7f3", "outputId": "9e8df52c-eeba-4c2c-f073-ceff46ae4a43" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "print(sentiment_analysis('You are too good'))" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Twitter_Analysis.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 }