{ "metadata": { "kernelspec": { "name": "python", "display_name": "Python (Pyodide)", "language": "python" }, "language_info": { "codemirror_mode": { "name": "python", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8" } }, "nbformat_minor": 5, "nbformat": 4, "cells": [ { "id": "61a20a1b", "cell_type": "code", "source": "import pandas as pd\nimport numpy as np\nimport seaborn as sns\nimport re\nimport nltk\nfrom nltk.stem.isri import ISRIStemmer\nimport string\nimport matplotlib.pyplot as plt\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, MinMaxScaler\nfrom sklearn import svm\nfrom sklearn.model_selection import train_test_split, cross_val_score, KFold\nfrom sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\nfrom sklearn.metrics import classification_report, confusion_matrix\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.svm import SVC\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn import metrics\nfrom nltk.corpus import stopwords\nfrom nltk.tokenize import word_tokenize\nfrom sklearn.naive_bayes import ComplementNB, MultinomialNB\nfrom tashaphyne.stemming import ArabicLightStemmer\nimport pyarabic.araby as araby\nimport unicodedata\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.utils.class_weight import compute_class_weight", "metadata": {}, "outputs": [], "execution_count": 6 }, { "id": "f69753ff", "cell_type": "code", "source": "# Read the two CSV files into DataFrames\ncombined_df = pd.read_csv(r\"C:\\Users\\moner\\Downloads\\merged_file.csv\")\n\n# Combine the two DataFrames\n#combined_df = pd.concat([data_df1, data_df2])\n\n# Reset the index of the combined DataFrame\ncombined_df = combined_df.reset_index(drop=True)\n\n# Now, 'combined_df' contains the data from both data sets.\nimport pyarabic.araby as araby\ncombined_df.head()\n#Drop any rows with missing values in the LABEL\ndata_df.dropna(subset=['LABEL'], inplace=True)", "metadata": {}, "outputs": [], "execution_count": 118 }, { "id": "b000b964", "cell_type": "code", "source": "combined_df['LABEL'].value_counts()", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2 2067\n", "0 1756\n", "Name: LABEL, dtype: int64" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 119 }, { "id": "71f8d462", "cell_type": "code", "source": "# Drop any duplicate rows based on the tweet column\ncombined_df.drop_duplicates(keep='first', inplace=True)\ncombined_df.drop_duplicates(subset='Tweet', keep='first', inplace=True)", "metadata": {}, "outputs": [], "execution_count": 120 }, { "id": "7f171fef", "cell_type": "code", "source": "combined_df['Tweet']", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 اولا الله يوفقهم ان شاءالله وثانيا الرياضة شي ...\n", "1 الاحظ شكثر الحريم اللي يمشون على ممشى البحر وف...\n", "2 حلو بما اننه عجبتش الرياضة استمري فيها وما بس ...\n", "3 طيب يا غبي مصارعة الحريم للحريم ؟؟ ولا فيها ام...\n", "4 اذا مافيه الا هاالعلاج مالك الاتصبرين كم شهر ل...\n", " ... \n", "3818 لن اتزوج بنت تتابع كوره انتهى\n", "3819 اخذ واعطي مع البنت بكل شي الا كرة القدم اطلعي ...\n", "3820 البنت و كرة القدم خطان متوازيان لا يلتقيان ف ا...\n", "3821 تفقد البنت ٪ من انوثتها حينما تتابع كرة القدم\n", "3822 المكياج للنساء فقط كرة القدم للرجال فقط الموضو...\n", "Name: Tweet, Length: 3796, dtype: object" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 121 }, { "id": "b97696f5", "cell_type": "code", "source": "my_emojis = {\n \"🙂\":\"يبتسم\",\n \"😂\":\"يضحك\",\n \"💔\":\"قلب حزين\",\n \"🙂\":\"يبتسم\",\n \"🤭\":\"حياء\", \n \"❤️\":\"حب\",\n \"❤\":\"حب\",\n \"😍\":\"حب\",\n \"😭\":\"يضحك\",\n \"😢\":\"حزن\",\n \"😔\":\"حزن\", \n \"♥\":\"حب\",\n \"💜\":\"حب\",\n \"😅\":\"يضحك\",\n \"🙁\":\"حزين\",\n \"💕\":\"حب\",\n \"💙\":\"حب\",\n \"😞\":\"حزين\",\n \"😊\":\"سعادة\",\n \"👏\":\"يصفق\",\n \"👌\":\"احسنت\",\n \"😴\":\"ينام\",\n \"😀\":\"يضحك\",\n \"😌\":\"مرتاح\",\n \"🌹\":\"وردة\",\n \"🙈\":\"حب\",\n \"😄\":\"يضحك\",\n \"😐\":\"محايد\",\n \"✌\":\"منتصر\",\n \"✨\":\"نجمه\",\n \"🤔\":\"تفكير\",\n \"😏\":\"يستهزء\",\n \"😒\":\"يستهزء\",\n \"🙄\":\"ملل\",\n \"😕\":\"عصبية\",\n \"😃\":\"يضحك\",\n \"🌸\":\"وردة\",\n \"😓\":\"حزن\",\n \"💞\":\"حب\",\n \"💗\":\"حب\",\n \"😑\":\"منزعج\",\n \"💭\":\"تفكير\",\n \"😎\":\"ثقة\",\n \"💛\":\"حب\",\n \"😩\":\"حزين\",\n \"💪\":\"عضلات\",\n \"👍\":\"موافق\",\n \"🙏🏻\":\"رجاء طلب\",\n \"😳\":\"مصدوم\",\n \"👏🏼\":\"تصفيق\",\n \"🎶\":\"موسيقي\",\n \"🌚\":\"صمت\",\n \"💚\":\"حب\",\n \"🙏\":\"رجاء طلب\",\n \"💘\":\"حب\",\n \"🍃\":\"سلام\",\n \"☺\":\"يضحك\",\n \"🐸\":\"ضفدع\",\n \"😶\":\"مصدوم\",\n \"✌️\":\"مرح\",\n \"✋🏻\":\"توقف\",\n \"😉\":\"غمزة\",\n \"🌷\":\"حب\",\n \"🙃\":\"مبتسم\",\n \"😫\":\"حزين\",\n \"😨\":\"مصدوم\",\n \"🎼 \":\"موسيقي\",\n \"🍁\":\"مرح\",\n \"🍂\":\"مرح\",\n \"💟\":\"حب\",\n \"😪\":\"حزن\",\n \"😆\":\"يضحك\",\n \"😣\":\"استياء\",\n \"☺️\":\"حب\",\n \"😱\":\"كارثة\",\n \"😁\":\"يضحك\",\n \"😖\":\"استياء\",\n \"🏃🏼\":\"يجري\",\n \"😡\":\"غضب\",\n \"🚶\":\"يسير\",\n \"🤕\":\"مرض\",\n \"‼️\":\"تعجب\",\n \"🕊\":\"طائر\",\n \"👌🏻\":\"احسنت\",\n \"❣\":\"حب\",\n \"🙊\":\"مصدوم\",\n \"💃\":\"سعادة مرح\",\n \"💃🏼\":\"سعادة مرح\",\n \"😜\":\"مرح\",\n \"👊\":\"ضربة\",\n \"😟\":\"استياء\",\n \"💖\":\"حب\",\n \"😥\":\"حزن\",\n \"🎻\":\"موسيقي\",\n \"✒\":\"يكتب\",\n \"🚶🏻\":\"يسير\",\n \"💎\":\"الماس\",\n \"😷\":\"وباء مرض\",\n \"☝\":\"واحد\",\n \"🚬\":\"تدخين\",\n \"💐\" : \"ورد\",\n \"🌞\" : \"شمس\",\n \"👆\" : \"الاول\",\n \"⚠️\" :\"تحذير\",\n \"🤗\" : \"احتواء\",\n \"✖️\": \"غلط\",\n \"📍\" : \"مكان\", \n \"👸\" : \"ملكه\",\n \"👑\" : \"تاج\",\n \"✔️\" : \"صح\",\n \"💌\": \"قلب\",\n \"😲\" : \"مندهش\",\n \"💦\": \"ماء\",\n \"🚫\" : \"خطا\",\n \"👏🏻\" : \"برافو\",\n \"🏊\" :\"يسبح\",\n \"👍🏻\": \"تمام\",\n \"⭕️\" :\"دائره كبيره\",\n \"🎷\" : \"ساكسفون\",\n \"👋\": \"تلويح باليد\",\n \"✌🏼\": \"علامه النصر\",\n \"🌝\":\"مبتسم\",\n \"➿\" : \"عقده مزدوجه\",\n \"💪🏼\" : \"قوي\",\n \"📩\": \"تواصل معي\",\n \"☕️\": \"قهوه\",\n \"😧\" : \"قلق و صدمة\",\n \"🗨\": \"رسالة\", \n \"❗️\" :\"تعجب\",\n \"🙆🏻\": \"اشاره موافقه\",\n \"👯\" :\"اخوات\",\n \"©\" : \"رمز\",\n \"👵🏽\" :\"سيده عجوزه\",\n \"🐣\": \"كتكوت\", \n \"🙌\": \"تشجيع\",\n \"🙇\": \"شخص ينحني\",\n \"👐🏽\":\"ايدي مفتوحه\", \n \"👌🏽\": \"بالظبط\",\n \"⁉️\" : \"استنكار\",\n \"⚽️\": \"كوره\",\n \"🕶\" :\"حب\",\n \"🎈\" :\"بالون\", \n \"🎀\": \"ورده\",\n \"💵\": \"فلوس\", \n \"😋\": \"جائع\",\n \"😛\": \"يغيظ\",\n \"😠\": \"غاضب\",\n \"✍🏻\": \"يكتب\",\n \"🌾\": \"ارز\",\n \"👣\": \"اثر قدمين\",\n \"❌\":\"رفض\",\n \"🍟\":\"طعام\",\n \"👬\":\"صداقة\",\n \"🐰\":\"ارنب\", \n \"☂\":\"مطر\",\n \"⚜\":\"مملكة فرنسا\",\n \"🐑\":\"خروف\",\n \"🗣\":\"صوت مرتفع\",\n \"👌🏼\":\"احسنت\",\n \"☘\":\"مرح\",\n \"😮\":\"صدمة\",\n \"😦\":\"قلق\",\n \"⭕\":\"الحق\",\n \"✏️\":\"قلم\",\n \"ℹ\":\"معلومات\",\n \"🙍🏻\":\"رفض\",\n \"⚪️\":\"نضارة نقاء\",\n \"🐤\":\"حزن\",\n \"💫\":\"مرح\",\n \"💝\":\"حب\",\n \"🍔\":\"طعام\",\n \"❤︎\":\"حب\",\n \"✈️\":\"سفر\",\n \"🏃🏻‍♀️\":\"يسير\",\n \"🍳\":\"ذكر\",\n \"🎤\":\"مايك غناء\",\n \"🎾\":\"كره\",\n \"🐔\":\"دجاجة\",\n \"🙋\":\"سؤال\",\n \"💉\":\"دواء\",\n \"🙏🏼\":\"رجاء طلب\",\n \"💂🏿 \":\"حارس\",\n \"🎬\":\"سينما\",\n \"♦️\":\"مرح\",\n \"💡\":\"قكرة\",\n \"‼\":\"تعجب\",\n \"👼\":\"طفل\",\n \"🔑\":\"مفتاح\",\n \"♥️\":\"حب\",\n \"🕋\":\"كعبة\",\n \"🐓\":\"دجاجة\",\n \"💩\":\"معترض\",\n \"👽\":\"فضائي\",\n \"☔️\":\"مطر\",\n \"🍷\":\"عصير\",\n \"🌟\":\"نجمة\",\n \"☁️\":\"سحب\",\n \"👃\":\"معترض\",\n \"🌺\":\"مرح\",\n \"🔪\":\"سكينة\",\n \"♨\":\"سخونية\",\n \"👊🏼\":\"ضرب\",\n \"✏\":\"قلم\",\n \"🚶🏾‍♀️\":\"يسير\",\n \"👊\":\"ضربة\",\n \"◾️\":\"وقف\",\n \"😚\":\"حب\",\n \"🔸\":\"مرح\",\n \"👎🏻\":\"لا يعجبني\",\n \"👊🏽\":\"ضربة\",\n \"😙\":\"حب\",\n \"🎥\":\"تصوير\",\n \"👉\":\"جذب انتباه\",\n \"👏🏽\":\"يصفق\",\n \"💪🏻\":\"عضلات\",\n \"🏴\":\"اسود\",\n \"🔥\":\"حريق\", \n \"😬\":\"عدم الراحة\", \n \"👊🏿\":\"يضرب\", \n \"🌿\":\"ورقه شجره\", \n \"✋🏼\":\"كف ايد\", \n \"👐\":\"ايدي مفتوحه\", \n \"☠️\":\"وجه مرعب\", \n \"🎉\":\"يهنئ\", \n \"🔕\" :\"صامت\",\n \"😿\":\"وجه حزين\", \n \"☹️\":\"وجه يائس\",\n \"😘\" :\"حب\", \n \"😰\" :\"خوف و حزن\",\n \"🌼\":\"ورده\", \n \"💋\": \"بوسه\",\n \"👇\":\"لاسفل\", \n \"❣️\":\"حب\", \n \"🎧\":\"سماعات\",\n \"📝\":\"يكتب\", \n \"😇\":\"ملاك\", \n \"😈\":\"رعب\", \n \"🏃\":\"يجري\", \n \"✌🏻\":\"علامه النصر\", \n \"🔫\":\"يضرب\", \n \"❗️\":\"تعجب\",\n \"👎\":\"غير موافق\", \n \"🔐\":\"قفل\", \n \"👈\":\"لليمين\",\n \"™\":\"رمز\", \n \"🚶🏽\":\"يتمشي\", \n \"😯\":\"متفاجأ\", \n \"✊\":\"يد مغلقه\", \n \"😻\":\"اعجاب\", \n \"🙉\" :\"قرد\", \n \"👧\":\"طفله صغيره\", \n \"🔴\":\"دائره حمراء\", \n \"🏽\":\"قوه\", \n \"💤\":\"ينام\", \n \"👀\":\"ينظر\", \n \"✍🏻\":\"يكتب\", \n \"❄️\":\"تلج\",\n \"💀\":\"رعب\", \n \"😤\":\"وجه عابس\", \n \"🖋\":\"قلم\", \n \"🎩\":\"كاب\", \n \"☕️\":\"قهوه\", \n \"😹\":\"ضحك\", \n \"💓\":\"حب\", \n \"☄️ \":\"نار\", \n \"👻\":\"رعب\",\n \"🤣\":\"ضحك\",\n }", "metadata": {}, "outputs": [], "execution_count": 122 }, { "id": "59ae2dee", "cell_type": "code", "source": "# Remove special characters\narabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:\"؟.,'{}~¦+|!”…“–ـ'''\nenglish_punctuations = string.punctuation\npunctuations_list = arabic_punctuations + english_punctuations\n\ndef remove_punctuations(text):\n if isinstance(text, str):\n # Replace underscore (_) with a space before removing other punctuations\n text = text.replace('_', ' ')\n translator = str.maketrans('', '', punctuations_list)\n text = re.sub(r'[!\\\"#$%&\\'()*+,-./:;<=>?@[\\\\]^`{|}~]', ' ', text)\n return text.strip() # Remove leading and trailing spaces\n else:\n return text # Return the input value if it's not a string\n# Tokenization\n\ndef tokenize_tweet(tweet):\n if isinstance(tweet, str):\n # Tokenize the tweet into individual words\n tokens = nltk.word_tokenize(tweet)\n return tokens\n else:\n return []\n\ndef remove_stop_words(text):\n stop_words = set(stopwords.words('arabic'))\n words = word_tokenize(text)\n words = [word for word in words if word not in stop_words and word.isalpha()]\n return \" \".join(words)\n\ndef remove_non_arabic(text):\n return re.sub(r'[^\\u0600-\\u06FF\\s]', '', text)\n\n# stemming\ndef stem_tokens(tokens):\n stemmer = ArabicLightStemmer()\n stemmed = [stemmer.light_stem(token) for token in tokens]\n return ' '.join(stemmed)\n\n# Normalization\ndef normalize_arabic(text):\n text = text.strip()\n# text = re.sub('[إأآ]', 'ا', text) # Normalize Alef characters\n# text = re.sub('[ٱٲٳإ]', 'ا', text) # Normalize special Alef characters\n\n# text = re.sub('[ة]', 'ه', text) # Normalize Ta Marbuta characters to Ha\n\n\n\n\n # Normalize Arabic text by removing diacritics and normalizing characters\n text = unicodedata.normalize('NFKD', text)\n text = re.sub(r'[\\u0610-\\u061A\\u064B-\\u0652\\u06D6-\\u06DC\\u06DF\\u06E0\\u06E4-\\u06E7\\u06E9\\u06EA\\u06ED]', '', text)\n \n\n\n\n \n # Remove longation\n text = re.sub(r'(.)\\1+', r\"\\1\\1\", text) \n \n #Strip vowels from a text, include Shadda.\n text = araby.strip_tashkeel(text)\n \n #Strip diacritics from a text, include harakats and small lettres The striped marks are\n text = araby.strip_diacritics(text)\n text=''.join([i for i in text if not i.isdigit()])\n \n # remove repeated letters\n text = re.sub(r'(\\w)\\1{2,}', r'\\1', text)\n \n return text.strip() # Remove leading and trailing spaces\nimport re\n\ndef convert_emojis(tweet):\n if isinstance(tweet, str):\n for emot in my_emojis:\n pattern = re.escape(emot) # Escape special characters in emot\n replacement = \" \".join(my_emojis[emot].replace(\",\", \"\").replace(\":\", \"\").split())\n tweet = re.sub(f'({pattern})(?!\\w)', rf' {replacement} ',tweet)\n return tweet\n else:\n return tweet if tweet is not None else \"\"\ndef processPost(tweet):\n # Convert emojis\n tweet = convert_emojis(tweet)\n \n # Remove punctuations from the tweet\n tweet = remove_punctuations(tweet)\n\n # Tokenize the cleaned tweet\n tokens = tokenize_tweet(tweet)\n \n # Remove non-Arabic characters\n tokens = remove_non_arabic(' '.join(tokens)).split()\n\n # Normalize Arabic text\n tokens = normalize_arabic(' '.join(tokens)).split()\n \n return ' '.join(tokens) # Join the processed words back into a single string\n\ncombined_df['clean_text'] = combined_df['Tweet'].apply(processPost)", "metadata": {}, "outputs": [], "execution_count": 123 }, { "id": "645df7a1", "cell_type": "code", "source": "combined_df.to_csv ('cleaned_50Tweet.csv',encoding='utf-8-sig')", "metadata": {}, "outputs": [ { "ename": "PermissionError", "evalue": "[Errno 13] Permission denied: 'cleaned_50Tweet.csv'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mPermissionError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_17220/4198889860.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcombined_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'cleaned_50Tweet.csv'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8-sig'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[0;32m 3464\u001b[0m )\n\u001b[0;32m 3465\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3466\u001b[1;33m return DataFrameRenderer(formatter).to_csv(\n\u001b[0m\u001b[0;32m 3467\u001b[0m \u001b[0mpath_or_buf\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3468\u001b[0m \u001b[0mline_terminator\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mline_terminator\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\formats\\format.py\u001b[0m in \u001b[0;36mto_csv\u001b[1;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[0;32m 1103\u001b[0m \u001b[0mformatter\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfmt\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1104\u001b[0m )\n\u001b[1;32m-> 1105\u001b[1;33m \u001b[0mcsv_formatter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1106\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1107\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcreated_buffer\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\formats\\csvs.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 235\u001b[0m \"\"\"\n\u001b[0;32m 236\u001b[0m \u001b[1;31m# apply compression and byte/text conversion\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 237\u001b[1;33m with get_handle(\n\u001b[0m\u001b[0;32m 238\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 700\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;34m\"b\"\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 701\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 702\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 703\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 704\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: 'cleaned_50Tweet.csv'" ] } ], "execution_count": 124 }, { "id": "69e08c99", "cell_type": "code", "source": "# Check for null values in the DataFrame\nnull_counts = combined_df.isnull().sum()\n\n# Display the number of null values for each column\nprint(null_counts)\n", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tweet 0\n", "LABEL 0\n", "P+D+PFN: 0\n", "P+SMN: 0\n", "C+P+D+SMN: 0\n", " ..\n", "P+DFN: 0\n", "NEGPART: 0\n", "C+IMPV+PRO: 0\n", "EMPH+PSTV: 0\n", "clean_text 0\n", "Length: 265, dtype: int64\n" ] } ], "execution_count": 125 }, { "id": "ac66c38f", "cell_type": "code", "source": "# remove the \"unwanted\" class\ncombined_df['LABEL'] = combined_df['LABEL'].astype(int)\ncombined_df = combined_df[(combined_df['LABEL'] != 3) & (data_df['LABEL'] != 1)]", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\moner\\AppData\\Local\\Temp/ipykernel_17220/3413781754.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " combined_df = combined_df[(combined_df['LABEL'] != 3) & (data_df['LABEL'] != 1)]\n" ] } ], "execution_count": 150 }, { "id": "aefc9aa7", "cell_type": "code", "source": "combined_df", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TweetLABELP+D+PFN:P+SMN:C+P+D+SMN:B-PER:PRSV+PRO:Q:DMN:C+PRO:...DMN+PRO:VOC+ALLAH:P+FWN:NEG+PRSV:EMPH+NEG:P+DFN:NEGPART:C+IMPV+PRO:EMPH+PSTV:clean_text
0اولا الله يوفقهم ان شاءالله وثانيا الرياضة شي ...000001000...000000000اولا الله يوفقهم ان شاءالله وثانيا الرياضة شي ...
1الاحظ شكثر الحريم اللي يمشون على ممشى البحر وف...000000000...000000000الاحظ شكثر الحريم اللي يمشون على ممشى البحر وف...
2حلو بما اننه عجبتش الرياضة استمري فيها وما بس ...000010000...000000000حلو بما اننه عجبتش الرياضة استمري فيها وما بس ...
3طيب يا غبي مصارعة الحريم للحريم ؟؟ ولا فيها ام...000000000...000000000طيب يا غبي مصارعة الحريم للحريم ؟؟ ولا فيها ام...
4اذا مافيه الا هاالعلاج مالك الاتصبرين كم شهر ل...001011100...000000000اذا مافيه الا هاالعلاج مالك الاتصبرين كم شهر ل...
..................................................................
3816غصب ان الحريم موجودات ضحك سبق و قلتوا ان الحرم...200000000...000000000غصب ان الحريم موجودات ضحك سبق و قلتوا ان الحرم...
3817ابعدو الحريم عن كرة القدم ارجوكم اعيدو الحريم ...200002000...000000000ابعدو الحريم عن كرة القدم ارجوكم اعيدو الحريم ...
3818لن اتزوج بنت تتابع كوره انتهى200000000...000000000لن اتزوج بنت تتابع كوره انتهى
3819اخذ واعطي مع البنت بكل شي الا كرة القدم اطلعي ...200000000...000000000اخذ واعطي مع البنت بكل شي الا كرة القدم اطلعي ...
3820البنت و كرة القدم خطان متوازيان لا يلتقيان ف ا...200000000...000000000البنت و كرة القدم خطان متوازيان لا يلتقيان ف ا...
\n", "

3697 rows × 265 columns

\n", "
" ], "text/plain": [ " Tweet LABEL P+D+PFN: \\\n", "0 اولا الله يوفقهم ان شاءالله وثانيا الرياضة شي ... 0 0 \n", "1 الاحظ شكثر الحريم اللي يمشون على ممشى البحر وف... 0 0 \n", "2 حلو بما اننه عجبتش الرياضة استمري فيها وما بس ... 0 0 \n", "3 طيب يا غبي مصارعة الحريم للحريم ؟؟ ولا فيها ام... 0 0 \n", "4 اذا مافيه الا هاالعلاج مالك الاتصبرين كم شهر ل... 0 0 \n", "... ... ... ... \n", "3816 غصب ان الحريم موجودات ضحك سبق و قلتوا ان الحرم... 2 0 \n", "3817 ابعدو الحريم عن كرة القدم ارجوكم اعيدو الحريم ... 2 0 \n", "3818 لن اتزوج بنت تتابع كوره انتهى 2 0 \n", "3819 اخذ واعطي مع البنت بكل شي الا كرة القدم اطلعي ... 2 0 \n", "3820 البنت و كرة القدم خطان متوازيان لا يلتقيان ف ا... 2 0 \n", "\n", " P+SMN: C+P+D+SMN: B-PER: PRSV+PRO: Q: DMN: C+PRO: ... DMN+PRO: \\\n", "0 0 0 0 1 0 0 0 ... 0 \n", "1 0 0 0 0 0 0 0 ... 0 \n", "2 0 0 1 0 0 0 0 ... 0 \n", "3 0 0 0 0 0 0 0 ... 0 \n", "4 1 0 1 1 1 0 0 ... 0 \n", "... ... ... ... ... .. ... ... ... ... \n", "3816 0 0 0 0 0 0 0 ... 0 \n", "3817 0 0 0 2 0 0 0 ... 0 \n", "3818 0 0 0 0 0 0 0 ... 0 \n", "3819 0 0 0 0 0 0 0 ... 0 \n", "3820 0 0 0 0 0 0 0 ... 0 \n", "\n", " VOC+ALLAH: P+FWN: NEG+PRSV: EMPH+NEG: P+DFN: NEGPART: C+IMPV+PRO: \\\n", "0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... \n", "3816 0 0 0 0 0 0 0 \n", "3817 0 0 0 0 0 0 0 \n", "3818 0 0 0 0 0 0 0 \n", "3819 0 0 0 0 0 0 0 \n", "3820 0 0 0 0 0 0 0 \n", "\n", " EMPH+PSTV: clean_text \n", "0 0 اولا الله يوفقهم ان شاءالله وثانيا الرياضة شي ... \n", "1 0 الاحظ شكثر الحريم اللي يمشون على ممشى البحر وف... \n", "2 0 حلو بما اننه عجبتش الرياضة استمري فيها وما بس ... \n", "3 0 طيب يا غبي مصارعة الحريم للحريم ؟؟ ولا فيها ام... \n", "4 0 اذا مافيه الا هاالعلاج مالك الاتصبرين كم شهر ل... \n", "... ... ... \n", "3816 0 غصب ان الحريم موجودات ضحك سبق و قلتوا ان الحرم... \n", "3817 0 ابعدو الحريم عن كرة القدم ارجوكم اعيدو الحريم ... \n", "3818 0 لن اتزوج بنت تتابع كوره انتهى \n", "3819 0 اخذ واعطي مع البنت بكل شي الا كرة القدم اطلعي ... \n", "3820 0 البنت و كرة القدم خطان متوازيان لا يلتقيان ف ا... \n", "\n", "[3697 rows x 265 columns]" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 151 }, { "id": "591ed234", "cell_type": "code", "source": "\n# Separate features and labels\nx = combined_df[[\"P+D+PFN:\", \"P+SMN:\", \"B-PER:\", \"PRSV+PRO:\", \"Q:\", \"DMN:\", \"C+PRO:\", \"B-PROPH:\", \"C+D+SMAJ:\", \"EXC+PSTV+ALLAH:\", \"C+SFAJ:\", \"C+D+PFN:\", \"C+T+PRO:\", \"B-MISC:\", \"FUT+PRSV+PRO:\", \"PMN:\", \"SFN:\", \"D+FWN:\", \"C+NEG:\", \"C+ACC+PRO:\", \"ACC+PREV:\", \"CERT:\", \"D+DFAJ:\", \"P+PIN:\", \"C+PRSV:\", \"D+PMN:\", \"T+PRO:\", \"C+SMN:\", \"P+DM:\", \"C:\", \"C+PRSV+PRO:\", \"AJNM:\", \"C+PMN:\", \"P+B-ORG:\", \"D+PMAJ:\", \"REL+PRO:\", \"P+SMN+PRO:\", \"NEG:\", \"EXC:\", \"PREV:\", \"DM+D+SFN:\", \"NM:\", \"NEG+SMN:\", \"B-AWARD:\", \"D+PIAJ:\", \"PRSV:\", \"D+DFN:\", \"AJCMP:\", \"DM:\", \"PFN+PRO:\", \"C+COND:\", \"D+DMN:\", \"LC+PRO:\", \"PIN+PRO:\", \"SMN+PRO:\", \"C+P+B-LOC:\", \"PPRSV:\", \"C+PPSTV:\", \"P:\", \"VOC:\", \"SFAJ:\", \"C+VOC:\", \"ACC+PRO:\", \"P+NQ+PRO:\", \"DMAJ:\", \"IMPV:\", \"PSTV+PRO:\", \"C+IMPV:\", \"PRP:\", \"C+B-LOC:\", \"P+PIN+PRO:\", \"C+P+PRO:\", \"C+D+SFAJ:\", \"AJCMP+PRO:\", \"I-LOC:\", \"B-EVENT:\", \"P+D+NQ:\", \"C+Q+P:\", \"P+PMN+PRO:\", \"I-PER:\", \"NPART+PRO:\", \"Q+P+PRO:\", \"PIAJ:\", \"C+P:\", \"C+PFN+PRO:\", \"P+D+SMN:\", \"C+P+SMN:\", \"C+LC:\", \"EXH:\", \"AV:\", \"C+PIN:\", \"INDEF:\", \"ALLAH+VOC:\", \"C+C:\", \"B-MONTH:\", \"LC:\", \"DFAJ:\", \"B-TIME:\", \"PIN:\", \"D+PFN:\", \"P+ACC+PRO:\", \"FUT:\", \"REL+PSTV:\", \"D+SFAJ:\", \"C+P+REL:\", \"Q+PRO:\", \"I-EVENT:\", \"D+SMN:\", \"NPART:\", \"PMAJ:\", \"REL:\", \"DMN+PRO:\", \"P+PFN+PRO:\", \"C+D+NQ:\", \"C+FWN:\", \"VOC+ALLAH:\", \"D+PIN:\", \"C+DM:\", \"AB:\", \"B-TITLE:\", \"C+CERT:\", \"P+D+SFN:\", \"P+FWN:\", \"P+PRO:\", \"VOC+SMN:\", \"IJ:\", \"P+AJCMP:\", \"NEG+PRSV:\", \"C+Q:\", \"P+B-PER:\", \"PRO:\", \"D+SMAJ:\", \"P+B-LOC:\", \"C+D+FWN:\", \"IMPV+PRO:\", \"D+AJCMP:\", \"C+P+DM:\", \"EMPH+NEG:\", \"C+SFN+PRO:\", \"D+SMN+P+ALLAH:\", \"EXL:\", \"C+PSTV:\", \"PPSTV:\", \"SMN:\", \"C+PSTV+PRO:\", \"VPART+PRSV:\", \"OATH+ALLAH:\", \"C+PIN+PRO:\", \"ALLAH:\", \"P+PFN:\", \"RET:\", \"C+NQ+PRO:\", \"PURP+PRSV:\", \"C+NM:\", \"COND:\", \"NEG+PSTV:\", \"P+NEG:\", \"D+AJNM:\", \"C+D+SMN:\", \"NQ:\", \"P+SUB:\", \"NQ+PRO:\", \"P+DFN:\", \"P+AJNM:\", \"PSTV:\", \"PX:\", \"P+SFN+PRO:\", \"P+SFN:\", \"C+B-PER:\", \"C+AV:\", \"C+P+NQ:\", \"C+D+PIN:\", \"D+NQ:\", \"C+D+PMN:\", \"PFAJ:\", \"NEG+P:\", \"INTG+PRO:\", \"C+SMAJ:\", \"FUT+PRSV:\", \"PFN:\", \"P+ALLAH:\", \"NEGPART:\", \"C+ACC:\", \"C+T:\", \"P+AV:\", \"P+REL:\", \"C+PFN:\", \"C+REL:\", \"DFN:\", \"SUB:\", \"C+PPRSV:\", \"B-DAY:\", \"P+D+PIN:\", \"VOC+SFN+PRO:\", \"C+P+D+SFN:\", \"C+ALLAH:\", \"I-ORG:\", \"FWN:\", \"C+SFN:\", \"D+NM:\", \"D+PFAJ:\", \"P+ACC:\", \"T:\", \"P+D+PMN:\", \"D:\", \"DM+D+SMN:\", \"SMAJ+PRO:\", \"SFN+PRO:\", \"P+NQ:\", \"C+IMPV+PRO:\", \"C+SMN+PRO:\", \"C+SUB:\", \"SMAJ:\", \"C+P+SFN:\", \"C+P+ALLAH:\", \"B-ORG:\", \"B-NATIONALITY:\", \"C+B-ORG:\", \"C+NQ:\", \"EMPH+PSTV:\", \"B-LOC:\", \"RES:\", \"D+SFN:\", \"C+D+SFN:\", \"ACC:\", \"C+P+D+SMN:\", \"C+P+NEG:\", \"EXP:\", \"FUT+PPRSV:\", \"C+VPART+PRSV:\", \"P+DMN:\", \"P+D+FWN:\", \"PSTV+ALLAH:\", \"IMPV+P+PRO:\", \"SUB+NEG:\", \"C+D+NM:\", \"P+NM:\", \"D+DMAJ:\", \"P+PMN:\", \"C+D+AJCMP:\", \"C+D+AJNM:\", \"C+AJCMP+PRO:\", \"P+T:\", \"C+AJCMP:\", \"C+AJNM:\", \"C+B-NATIONALITY:\", \"C+NEG+SMN:\", \"C+B-EVENT:\", \"P+B-EVENT:\", \"C+NEG+PRO:\", \"C+P+SUB:\", \"VPART+PRSV+PRO:\", \"C+FUT+PRSV:\", \"B-CLAN:\", \"I-LANG:\", \"REM+NEG:\", \"NM+PRO:\", \"DFN+PRO:\", \"PMN+PRO:\", \"C+PMN+PRO:\", \"P+B-LANG:\", \"NEG+P+PRO:\" ]]\ny = combined_df['LABEL']\n\n\n# Split data into train and test sets\nX_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)", "metadata": {}, "outputs": [], "execution_count": 152 }, { "id": "5c939928", "cell_type": "code", "source": "clf = DecisionTreeClassifier()\n\n# Fit the classifier on the training data\nclf.fit(X_train, y_train)\n\n# Make predictions on the test data\ny_pred = clf.predict(X_test)\n\n\n\nprint(classification_report(y_test, y_pred))\n\naccuracy = accuracy_score(y_test, y_pred)\nprint(\"Accuracy:\", accuracy)", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.76 0.75 0.75 534\n", " 2 0.77 0.78 0.77 576\n", "\n", " accuracy 0.76 1110\n", " macro avg 0.76 0.76 0.76 1110\n", "weighted avg 0.76 0.76 0.76 1110\n", "\n", "Accuracy: 0.7648648648648648\n" ] } ], "execution_count": 153 }, { "id": "7cb53abc", "cell_type": "code", "source": "from sklearn.metrics import classification_report\nfrom sklearn.preprocessing import LabelEncoder\nimport xgboost as xgb\nfrom imblearn.under_sampling import RandomUnderSampler\n\ndef balance__classes(X, y):\n sampler = RandomUnderSampler(random_state=40)\n X_balanced, y_balanced = sampler.fit_resample(X, y)\n return X_balanced, y_balanced\n\n# Balance the classes by undersampling the majority class\nX_train_balanced, y_train_balanced = balance__classes(X_train, y_train)\n\n# Encode the target variable as binary values\nlabel_encoder = LabelEncoder()\ny_train_binary = label_encoder.fit_transform(y_train_balanced)\n\n# Train XGBoost classifier with balanced classes\nclf = xgb.XGBClassifier(scale_pos_weight=(y_train_binary == 0).sum() / (y_train_binary == 1).sum())\nclf.fit(X_train_balanced, y_train_binary)\n\n# Encode the test set labels as binary values\ny_test_binary = label_encoder.transform(y_test)\n\n# Make predictions on test set\ny_pred = clf.predict(X_test)\n\n# Print classification report\nprint(classification_report(y_test_binary, y_pred))\naccuracy = accuracy_score(y_test_binary, y_pred)\nprint(\"Test Accuracy:\", accuracy)", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.86 0.75 0.80 534\n", " 1 0.79 0.89 0.84 576\n", "\n", " accuracy 0.82 1110\n", " macro avg 0.83 0.82 0.82 1110\n", "weighted avg 0.82 0.82 0.82 1110\n", "\n", "Test Accuracy: 0.8198198198198198\n" ] } ], "execution_count": 154 }, { "id": "44a8cea3", "cell_type": "code", "source": "# Train Support Vector Machine classifier\nclf = SVC(kernel='linear', C=3)\nclf.fit(X_train, y_train)\n\n# Make predictions on test set\ny_pred = clf.predict(X_test)\n\n# Print classification report\nprint(classification_report(y_test, y_pred))\naccuracy = accuracy_score(y_test , y_pred)\nprint(\"Test Accuracy:\", accuracy)", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.90 0.71 0.79 534\n", " 2 0.77 0.93 0.84 576\n", "\n", " accuracy 0.82 1110\n", " macro avg 0.84 0.82 0.82 1110\n", "weighted avg 0.83 0.82 0.82 1110\n", "\n", "Test Accuracy: 0.8207207207207208\n" ] } ], "execution_count": 155 }, { "id": "1190b35d", "cell_type": "code", "source": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import classification_report\nfrom sklearn.model_selection import train_test_split\n\n\n# Train a Random Forest classifier\nclf = RandomForestClassifier(n_estimators=100, random_state=42)\nclf.fit(X_train, y_train)\n\n# Make predictions on the test set\ny_pred = clf.predict(X_test)\n\n# Print classification report\nprint(classification_report(y_test, y_pred))\n\naccuracy = accuracy_score(y_test , y_pred)\nprint(\"Test Accuracy:\", accuracy)", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.91 0.73 0.81 534\n", " 2 0.79 0.94 0.86 576\n", "\n", " accuracy 0.84 1110\n", " macro avg 0.85 0.83 0.83 1110\n", "weighted avg 0.85 0.84 0.83 1110\n", "\n", "Test Accuracy: 0.836036036036036\n" ] } ], "execution_count": 160 }, { "id": "7a4917f7", "cell_type": "code", "source": "import seaborn as sns\nimport matplotlib.pyplot as plt\nfrom sklearn.metrics import confusion_matrix\n\n# Compute the confusion matrix\ncm = confusion_matrix(y_test , y_pred)\n\n# Create a heatmap using Seaborn\nsns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\")\n\n# Create a confusion matrix with the correct labels\ncm = confusion_matrix(y_test , y_pred)\n\n# Create a heatmap using Seaborn and specify the tick labels\nplt.figure(figsize=(10, 10))\nsns.set(font_scale=1.8)\nsns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, annot_kws={\"size\": 18, \"weight\": \"bold\"}, linewidths=0.5,\n xticklabels=['0', '1'], yticklabels=['0', '1'])\n\nplt.xlabel('Predicted', fontsize=20)\nplt.ylabel('Actual', fontsize=20)\nplt.tick_params(axis='both', which='major', labelsize=18)\nplt.show()\n\n", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "execution_count": 163 }, { "id": "edf7a1c7", "cell_type": "code", "source": "", "metadata": {}, "outputs": [], "execution_count": null } ] }