{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "87d76cd8", "metadata": {}, "outputs": [], "source": [ "#import dependencies\n", "import csv\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "c1363ada", "metadata": {}, "outputs": [], "source": [ "\n", "train = df_samples = pd.read_csv('training_data.csv')\n", "del train['fluid_overload']\n", "del train['fluid_overload.1']" ] }, { "cell_type": "code", "execution_count": 3, "id": "9187ef53", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prior Probability of Fungal infection: 0.024390243902439025\n", "Prior Probability of Allergy: 0.024390243902439025\n", "Prior Probability of GERD: 0.024390243902439025\n", "Prior Probability of Chronic cholestasis: 0.024390243902439025\n", "Prior Probability of Drug Reaction: 0.024390243902439025\n", "Prior Probability of Peptic ulcer diseae: 0.024390243902439025\n", "Prior Probability of AIDS: 0.024390243902439025\n", "Prior Probability of Diabetes : 0.024390243902439025\n", "Prior Probability of Gastroenteritis: 0.024390243902439025\n", "Prior Probability of Bronchial Asthma: 0.024390243902439025\n", "Prior Probability of Hypertension : 0.024390243902439025\n", "Prior Probability of Migraine: 0.024390243902439025\n", "Prior Probability of Cervical spondylosis: 0.024390243902439025\n", "Prior Probability of Paralysis (brain hemorrhage): 0.024390243902439025\n", "Prior Probability of Jaundice: 0.024390243902439025\n", "Prior Probability of Malaria: 0.024390243902439025\n", "Prior Probability of Chicken pox: 0.024390243902439025\n", "Prior Probability of Dengue: 0.024390243902439025\n", "Prior Probability of Typhoid: 0.024390243902439025\n", "Prior Probability of hepatitis A: 0.024390243902439025\n", "Prior Probability of Hepatitis B: 0.024390243902439025\n", "Prior Probability of Hepatitis C: 0.024390243902439025\n", "Prior Probability of Hepatitis D: 0.024390243902439025\n", "Prior Probability of Hepatitis E: 0.024390243902439025\n", "Prior Probability of Alcoholic hepatitis: 0.024390243902439025\n", "Prior Probability of Tuberculosis: 0.024390243902439025\n", "Prior Probability of Common Cold: 0.024390243902439025\n", "Prior Probability of Pneumonia: 0.024390243902439025\n", "Prior Probability of Dimorphic hemmorhoids(piles): 0.024390243902439025\n", "Prior Probability of Heart attack: 0.024390243902439025\n", "Prior Probability of Varicose veins: 0.024390243902439025\n", "Prior Probability of Hypothyroidism: 0.024390243902439025\n", "Prior Probability of Hyperthyroidism: 0.024390243902439025\n", "Prior Probability of Hypoglycemia: 0.024390243902439025\n", "Prior Probability of Osteoarthristis: 0.024390243902439025\n", "Prior Probability of Arthritis: 0.024390243902439025\n", "Prior Probability of (vertigo) Paroymsal Positional Vertigo: 0.024390243902439025\n", "Prior Probability of Acne: 0.024390243902439025\n", "Prior Probability of Urinary tract infection: 0.024390243902439025\n", "Prior Probability of Psoriasis: 0.024390243902439025\n", "Prior Probability of Impetigo: 0.024390243902439025\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming you have a DataFrame named 'train' with columns 'prognosis' and 'Symptoms'\n", "# 'prognosis' column contains various disease names\n", "# 'Symptoms' column contains the symptoms\n", "\n", "# Calculate the total count of records\n", "total_count = len(train)\n", "\n", "# Get unique disease names from the 'prognosis' column\n", "unique_diseases = train['prognosis'].unique()\n", "\n", "# Create a dictionary to store prior probabilities for each disease\n", "disease_prior_probabilities = {}\n", "\n", "# Calculate the prior probability for each unique disease\n", "for disease in unique_diseases:\n", " count_disease = len(train[train['prognosis'] == disease])\n", " prob_disease = count_disease / total_count\n", " disease_prior_probabilities[disease] = prob_disease\n", "\n", "# Print the prior probabilities for all unique diseases\n", "for disease, probability in disease_prior_probabilities.items():\n", " print(f\"Prior Probability of {disease}: {probability}\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "72efe8f8", "metadata": {}, "outputs": [], "source": [ "def sum_col(x):\n", " if any(x)==1:\n", " return x.values.sum()\n", "\n", "ndf = train.groupby('prognosis',as_index=False).agg(lambda x:(sum_col(x)))" ] }, { "cell_type": "code", "execution_count": 5, "id": "0ba92394", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | prognosis | \n", "itching | \n", "skin_rash | \n", "nodal_skin_eruptions | \n", "continuous_sneezing | \n", "shivering | \n", "chills | \n", "joint_pain | \n", "stomach_pain | \n", "acidity | \n", "... | \n", "pus_filled_pimples | \n", "blackheads | \n", "scurring | \n", "skin_peeling | \n", "silver_like_dusting | \n", "small_dents_in_nails | \n", "inflammatory_nails | \n", "blister | \n", "red_sore_around_nose | \n", "yellow_crust_ooze | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "(vertigo) Paroymsal Positional Vertigo | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "AIDS | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "Acne | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "108.0 | \n", "108.0 | \n", "108.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "Alcoholic hepatitis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "Allergy | \n", "NaN | \n", "NaN | \n", "NaN | \n", "108.0 | \n", "108.0 | \n", "108.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 | \n", "Arthritis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
6 | \n", "Bronchial Asthma | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
7 | \n", "Cervical spondylosis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
8 | \n", "Chicken pox | \n", "114.0 | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
9 | \n", "Chronic cholestasis | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
10 | \n", "Common Cold | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
11 | \n", "Dengue | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
12 | \n", "Diabetes | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
13 | \n", "Dimorphic hemmorhoids(piles) | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
14 | \n", "Drug Reaction | \n", "114.0 | \n", "108.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "108.0 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
15 | \n", "Fungal infection | \n", "108.0 | \n", "108.0 | \n", "108.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
16 | \n", "GERD | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "108.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
17 | \n", "Gastroenteritis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
18 | \n", "Heart attack | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
19 | \n", "Hepatitis B | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
20 | \n", "Hepatitis C | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
21 | \n", "Hepatitis D | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
22 | \n", "Hepatitis E | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
23 | \n", "Hypertension | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
24 | \n", "Hyperthyroidism | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
25 | \n", "Hypoglycemia | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
26 | \n", "Hypothyroidism | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
27 | \n", "Impetigo | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "114.0 | \n", "114.0 | \n", "
28 | \n", "Jaundice | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
29 | \n", "Malaria | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
30 | \n", "Migraine | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
31 | \n", "Osteoarthristis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
32 | \n", "Paralysis (brain hemorrhage) | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
33 | \n", "Peptic ulcer diseae | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
34 | \n", "Pneumonia | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
35 | \n", "Psoriasis | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "114.0 | \n", "114.0 | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
36 | \n", "Tuberculosis | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
37 | \n", "Typhoid | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "120.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
38 | \n", "Urinary tract infection | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
39 | \n", "Varicose veins | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
40 | \n", "hepatitis A | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "114.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
41 rows × 131 columns
\n", "