{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "Gkmj6mA3ZyVq", "metadata": { "id": "Gkmj6mA3ZyVq" }, "outputs": [], "source": [ "import sys\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "%matplotlib inline\n", "plt.style.use(\"ggplot\")\n", "plt.rcParams['figure.figsize'] = (12, 8)\n", "import seaborn as sns\n", "sns.set(style='whitegrid', color_codes=True)\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "import sklearn.model_selection" ] }, { "cell_type": "code", "execution_count": 2, "id": "6nvmS-3ZZ5XZ", "metadata": { "id": "6nvmS-3ZZ5XZ" }, "outputs": [], "source": [ "# from google.colab import files\n", "\n", "# uploaded = files.upload()\n", "df_f = pd.read_csv('dataset.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "e2a23ebd", "metadata": { "id": "e2a23ebd" }, "outputs": [], "source": [ "df_f = pd.read_csv('dataset.csv')\n", "df_f.dropna(axis=0, inplace=True)\n", "\n", "\n", "df_f.loc[:,\"BMI\"] = (df_f.loc[:,\"Weight (Kg)\"]) / np.square(df_f.loc[:,\"Height(Cm) \"]/100); # Calculate BMI(Body Mass Index)\n", "df_f.loc[:,\"BMI\"] = df_f.loc[:,\"BMI\"].round(2); # Round to two decimal places\n", "df_f.loc[:,\"FSH/LH\"] = df_f.loc[:,\"FSH(mIU/mL)\"] / df_f.loc[:,\"LH(mIU/mL)\"];\n", "df_f.loc[:,\"FSH/LH\"] = df_f.loc[:,\"FSH/LH\"].round(2);\n", "df_f.loc[:,\"Waist:Hip Ratio\"] = df_f.loc[:,\"Waist(inch)\"] / df_f.loc[:,\"Hip(inch)\"]\n", "df_f.loc[:,\"Waist:Hip Ratio\"] = df_f.loc[:,\"Waist:Hip Ratio\"].round(2)\n", "# df[df[\"Cycle(R/I)\"] == 5]\n", "df_f[\"Cycle(R/I)\"].replace({5: 4}, inplace=True)\n", "df_f[\"Cycle(R/I)\"].replace({2: 0, 4: 1}, inplace=True)\n", "df_f[\"II beta-HCG(mIU/mL)\"].replace({\"1.99.\": 1.99}, inplace=True)\n", "\n", "df_f[\"II beta-HCG(mIU/mL)\"] = df_f[\"II beta-HCG(mIU/mL)\"].astype(float)\n", "df_f[df_f[\"AMH(ng/mL)\"]== \"a\"].T\n", "# df_f.drop(df_f[\"AMH(ng/mL)\"]== \"a\", inplace=True)\n", "df_f.drop(df_f.loc[df_f[\"AMH(ng/mL)\"]== \"a\"].index, inplace=True);\n", "df_f[df_f[\"AMH(ng/mL)\"]== \"a\"]\n", "df_f[\"AMH(ng/mL)\"] = df_f[\"AMH(ng/mL)\"].astype(float)\n", "df_f[\"BP _Systolic (mmHg)\"].replace({12: 120}, inplace=True)\n", "df_f[\"BP _Diastolic (mmHg)\"].replace({8: 80}, inplace=True)\n", "df_f.to_csv('PCOS_clean_data_without_infertility.csv', index=False)\n", "\n", "from sklearn.feature_selection import chi2,f_classif, mutual_info_classif, SelectKBest\n", "from sklearn.model_selection import train_test_split, cross_val_score\n", "from sklearn.model_selection import RepeatedStratifiedKFold\n", "\n", "df = pd.read_csv('PCOS_clean_data_without_infertility.csv')\n", "df.head(12).T\n", "#df.columns\n", "\n", "df_cat = df_f[[\" Age (yrs)\",\n", " \"Pregnant(Y/N)\",\n", " \"Cycle(R/I)\",\n", " \"Blood Group\",\n", " \"Cycle length(days)\",\n", " \"No. of aborptions\",\n", " \"Weight gain(Y/N)\",\n", " \"hair growth(Y/N)\",\n", " \"Skin darkening (Y/N)\",\n", " \"Hair loss(Y/N)\",\n", " \"Pimples(Y/N)\",\n", " \"Fast food (Y/N)\",\n", " \"Reg.Exercise(Y/N)\",\n", "\n", "]]\n", "df_cat.columns\n", "\n", "df_target = df_f[[\"PCOS (Y/N)\"]]\n", "df_corr_num = df_f.drop(df_cat.columns,axis=1,inplace = False)\n", "df_num = df_f.drop(df_cat.columns,axis=1)\n", "df_num.drop([\"PCOS (Y/N)\"], axis=1, inplace= True)\n", "df_corr_num = pd.concat([df_target, df_num], axis=1, sort = False)\n", "df_corr_num.head()\n", "from sklearn.model_selection import StratifiedKFold\n", "from yellowbrick.model_selection import CVScores\n", "from sklearn.metrics import confusion_matrix\n", "\n", "\n", "def test_results(model, X_test, y_test):\n", " from sklearn.metrics import confusion_matrix\n", " y_pred = model.predict(X_test)\n", " tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", "\n", " accuracy = (tp + tn)/(tp + fp + tn + fn)\n", " print(\"Accuracy: \", '{:.2f}'.format(accuracy * 100))\n", " print(\"True Negative:\", tn)\n", " print(\"True Positve:\", tp)\n", " print(\"False Positive:\", fp)\n", " print(\"False Negative:\", fn)\n", " print()\n", " print(\"-------------------------------------------------------\")\n", " print(\"Negative Class Results\")\n", " precision = (tp / (tp + fp))\n", " recall = (tp / (tp + fn))\n", " f1_score = (2 * (precision * recall) / (precision + recall))\n", " print(\"Precision (N): \", '{:.2f}'.format(precision * 100))\n", " print(\"Recall (N): \", '{:.2f}'.format(recall * 100))\n", " print(\"F1 Score (N):\" , '{:.2f}'.format(f1_score * 100))\n", " print()\n", " print(\"-------------------------------------------------------\")\n", " print(\"Positive Class Results\")\n", " precision = (tn / (tn + fn))\n", " recall = (tn / (tn + fp))\n", " f1_score = (2 * (precision * recall) / (precision + recall))\n", " print(\"Precision (P): \", '{:.2f}'.format(precision * 100))\n", " print(\"Recall (P): \", '{:.2f}'.format(recall * 100))\n", " print(\"F1 Score (P):\" , '{:.2f}'.format(f1_score * 100))\n", "\n", "from yellowbrick.classifier import confusion_matrix\n", "\n", "def vis_conf(model, X_test, y_test):\n", " plt.figure(figsize=(6, 5))\n", " visualizer = confusion_matrix(\n", " model,\n", " X_test, y_test,\n", " is_fitted=True,\n", " classes=['Negative', 'Positive']\n", " )\n", " visualizer.show();" ] }, { "cell_type": "code", "execution_count": 4, "id": "9Yf9lrn1asJz", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9Yf9lrn1asJz", "outputId": "edce4331-3cee-4b48-a51a-4482c0919dba" }, "outputs": [ { "data": { "text/plain": [ "Index([' Age (yrs)', 'Weight (Kg)', 'BMI', 'Pulse rate(bpm) ', 'Hb(g/dl)',\n", " 'Cycle(R/I)', 'Cycle length(days)', 'Marraige Status (Yrs)',\n", " 'Hip(inch)', 'Waist(inch)', 'AMH(ng/mL)', 'Vit D3 (ng/mL)',\n", " 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)',\n", " 'Hair loss(Y/N)', 'Pimples(Y/N)', 'Fast food (Y/N)', 'Follicle No. (L)',\n", " 'Follicle No. (R)', 'Avg. F size (L) (mm)', 'Avg. F size (R) (mm)',\n", " 'Endometrium (mm)'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = df_f.drop([\"PCOS (Y/N)\",\n", " \"Sl. No\", \"Patient File No.\",\n", " \"Blood Group\",\n", " \"Height(Cm) \",\n", " \"Pregnant(Y/N)\",\n", " \"PRG(ng/mL)\",\n", " \"RR (breaths/min)\",\n", " \"No. of aborptions\",\n", " \"FSH/LH\",\n", " \" I beta-HCG(mIU/mL)\",\n", " \"II beta-HCG(mIU/mL)\",\n", " \"TSH (mIU/L)\",\n", " \"FSH(mIU/mL)\",\n", " \"LH(mIU/mL)\",\n", " \"Waist:Hip Ratio\",\n", " \"PRL(ng/mL)\",\n", " \"BP _Diastolic (mmHg)\",\n", " \"BP _Systolic (mmHg)\",\n", " \"Reg.Exercise(Y/N)\",\n", " \"RBS(mg/dl)\"\n", " ],axis=1)\n", "X.columns" ] }, { "cell_type": "code", "execution_count": 5, "id": "9rHUklm8a3kq", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9rHUklm8a3kq", "outputId": "65c4776c-39fb-4b4b-a183-bb1e9045f5e9" }, "outputs": [ { "data": { "text/plain": [ "(538, 1)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = df_f[[\"PCOS (Y/N)\"]]\n", "y.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "lMxlkbIqa4VV", "metadata": { "id": "lMxlkbIqa4VV" }, "outputs": [], "source": [ "from imblearn.combine import SMOTEENN\n", "\n", "resample = SMOTEENN(sampling_strategy=1/1, random_state =0)\n", "X, y = resample.fit_resample(X, y)" ] }, { "cell_type": "code", "execution_count": 7, "id": "tgHQ2RjIa932", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tgHQ2RjIa932", "outputId": "fcb84fea-c3c2-4ed3-9110-d5ac8d74e853" }, "outputs": [ { "data": { "text/plain": [ "(103, 23)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from imblearn.combine import SMOTEENN\n", "\n", "resample = SMOTEENN(sampling_strategy=1/1, random_state =0)\n", "X, y = resample.fit_resample(X, y)\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify= y)\n", "\n", "\n", "scaler = MinMaxScaler().fit(X_train)\n", "#range 0-1\n", "# scaled using scaled_value = (original_value - min) / (max - min)\n", "\n", "\n", "X_train = scaler.transform(X_train)\n", "X_train = pd.DataFrame(X_train)\n", "\n", "X_test = scaler.transform(X_test)\n", "X_test = pd.DataFrame(X_test)\n", "\n", "\n", "# Setting Column Names from dataset\n", "X_train.columns = X.columns\n", "X_test.columns = X.columns\n", "X_train.shape\n", "X_test.shape\n", "\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "pYXYflk5a8bT", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pYXYflk5a8bT", "outputId": "92b5e321-7756-4662-85fe-ffb2fb9734ed" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(409, 23)\n" ] } ], "source": [ "print(X_train.shape)" ] }, { "cell_type": "code", "execution_count": 9, "id": "TiMnBO2tNzkb", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TiMnBO2tNzkb", "outputId": "9635ae91-6b93-4089-d651-8436a24e75e7" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 97.09\n", "True Negative: 48\n", "True Positve: 52\n", "False Positive: 3\n", "False Negative: 0\n", "\n", "-------------------------------------------------------\n", "Negative Class Results\n", "Precision (N): 94.55\n", "Recall (N): 100.00\n", "F1 Score (N): 97.20\n", "\n", "-------------------------------------------------------\n", "Positive Class Results\n", "Precision (P): 100.00\n", "Recall (P): 94.12\n", "F1 Score (P): 96.97\n" ] } ], "source": [ "\n", "#XGB\n", "\n", "import xgboost as xgb\n", "xgb = xgb.XGBClassifier(max_depth=5, n_estimators=1500, learning_rate=0.3,scale_pos_weight=10,\n", " random_state= 0, n_jobs=-1)\n", "sum(cross_val_score(xgb, X, y, cv=10))/10\n", "xgb = xgb.fit(X_train, y_train)\n", "test_results(xgb, X_test, y_test)\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "CY6pdF61NwwA", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CY6pdF61NwwA", "outputId": "3909ccea-cd21-44d7-a5be-3d747983a17a", "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 93.20\n", "True Negative: 48\n", "True Positve: 48\n", "False Positive: 3\n", "False Negative: 4\n", "\n", "-------------------------------------------------------\n", "Negative Class Results\n", "Precision (N): 94.12\n", "Recall (N): 92.31\n", "F1 Score (N): 93.20\n", "\n", "-------------------------------------------------------\n", "Positive Class Results\n", "Precision (P): 92.31\n", "Recall (P): 94.12\n", "F1 Score (P): 93.20\n" ] } ], "source": [ "#SVM\n", "from sklearn.svm import SVC\n", "svm = SVC(kernel=\"linear\", gamma='auto', probability= True, random_state=0)\n", "svm.fit(X_train, y_train)\n", "test_results(svm, X_test, y_test)" ] }, { "cell_type": "code", "execution_count": 11, "id": "3c84db81", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best parameters: {'C': 0.1, 'gamma': 1, 'kernel': 'poly'}\n", "Accuracy: 95.15\n", "True Negative: 47\n", "True Positve: 51\n", "False Positive: 4\n", "False Negative: 1\n", "\n", "-------------------------------------------------------\n", "Negative Class Results\n", "Precision (N): 92.73\n", "Recall (N): 98.08\n", "F1 Score (N): 95.33\n", "\n", "-------------------------------------------------------\n", "Positive Class Results\n", "Precision (P): 97.92\n", "Recall (P): 92.16\n", "F1 Score (P): 94.95\n" ] } ], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "param_grid = {\n", " 'C': [0.1, 1, 10, 100],\n", " 'gamma': ['scale', 'auto', 0.1, 1, 10],\n", " 'kernel': ['linear', 'rbf', 'poly']\n", "}\n", "\n", "grid_search = GridSearchCV(SVC(probability=True, random_state=0), param_grid, cv=5)\n", "grid_search.fit(X_train, y_train)\n", "print(\"Best parameters:\", grid_search.best_params_)\n", "\n", "best_svm = grid_search.best_estimator_\n", "test_results(best_svm, X_test, y_test)\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "4d7f36fc", "metadata": {}, "outputs": [], "source": [ "#ENSEMBLE HARD" ] }, { "cell_type": "code", "execution_count": 13, "id": "7de6aaf7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Time: 1.25 seconds\n", "Testing Time: 0.04 seconds\n", "Test Accuracy: 0.95\n", "Cross-Validation Mean Accuracy: 0.975610\n", "Cross-Validation Standard Deviation: 0.015426\n" ] } ], "source": [ "import time\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.model_selection import cross_val_score\n", "import numpy as np\n", "from xgboost import XGBClassifier\n", "from sklearn.svm import SVC\n", "\n", "# # Create base models\n", "# xgb = XGBClassifier()\n", "# svm = SVC(probability=True) # Ensure SVM supports probabilities if needed\n", "\n", "# Create a dictionary of estimators\n", "estimators = [('svm', svm), ('xgb', xgb)]\n", "\n", "# Create our Voting Classifier\n", "ensemble1 = VotingClassifier(estimators=estimators, voting='hard')\n", "\n", "# Measure computational time for training\n", "start_train = time.time()\n", "ensemble1.fit(X_train, y_train)\n", "\n", "end_train = time.time()\n", "train_time = end_train - start_train\n", "\n", "# Measure computational time for testing\n", "start_test = time.time()\n", "test_score = ensemble1.score(X_test, y_test)\n", "end_test = time.time()\n", "test_time = end_test - start_test\n", "\n", "print(f\"Training Time: {train_time:.2f} seconds\")\n", "print(f\"Testing Time: {test_time:.2f} seconds\")\n", "print(f\"Test Accuracy: {test_score:.2f}\")\n", "\n", "# Perform 10-fold cross-validation to calculate standard deviation\n", "cv_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring='accuracy')\n", "\n", "# Calculate mean and standard deviation of cross-validation scores\n", "mean_accuracy = np.mean(cv_scores)\n", "std_dev_accuracy = np.std(cv_scores)\n", "\n", "print(f\"Cross-Validation Mean Accuracy: {mean_accuracy:2f}\")\n", "print(f\"Cross-Validation Standard Deviation: {std_dev_accuracy:2f}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "a4ffcc67", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cross-Validated Accuracy: Mean = 0.9756, Std Dev = 0.02\n", "Cross-Validated Precision: Mean = 0.9861, Std Dev = 0.02\n", "Cross-Validated Recall: Mean = 0.965952, Std Dev = 0.03\n", "Cross-Validated F1-Score: Mean = 0.975443, Std Dev = 0.02\n" ] } ], "source": [ "from sklearn.model_selection import cross_val_score\n", "from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score\n", "import numpy as np\n", "\n", "# Define scoring metrics\n", "precision = make_scorer(precision_score, average='binary') # Change 'binary' to 'weighted' for multi-class\n", "recall = make_scorer(recall_score, average='binary')\n", "f1 = make_scorer(f1_score, average='binary')\n", "\n", "# Perform cross-validation for each metric\n", "accuracy_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring='accuracy') # 'accuracy' is a built-in scorer\n", "precision_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=precision)\n", "recall_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=recall)\n", "f1_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=f1)\n", "\n", "# Calculate mean and standard deviation for each metric\n", "mean_accuracy = np.mean(accuracy_scores)\n", "std_dev_accuracy = np.std(accuracy_scores)\n", "\n", "mean_precision = np.mean(precision_scores)\n", "std_dev_precision = np.std(precision_scores)\n", "\n", "mean_recall = np.mean(recall_scores)\n", "std_dev_recall = np.std(recall_scores)\n", "\n", "mean_f1 = np.mean(f1_scores)\n", "std_dev_f1 = np.std(f1_scores)\n", "\n", "# Print results\n", "print(f\"Cross-Validated Accuracy: Mean = {mean_accuracy:.4f}, Std Dev = {std_dev_accuracy:.2f}\")\n", "print(f\"Cross-Validated Precision: Mean = {mean_precision:.4f}, Std Dev = {std_dev_precision:.2f}\")\n", "print(f\"Cross-Validated Recall: Mean = {mean_recall:f}, Std Dev = {std_dev_recall:.2f}\")\n", "print(f\"Cross-Validated F1-Score: Mean = {mean_f1:f}, Std Dev = {std_dev_f1:.2f}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d8d5a5f4", "metadata": {}, "outputs": [], "source": [ "#Ensemble Soft" ] }, { "cell_type": "code", "execution_count": 31, "id": "44e12feb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cross-Validation Mean Accuracy: 0.97\n", "Cross-Validation Standard Deviation: 0.02\n", "Training Time: 1.66 seconds\n", "Testing Time: 0.01 seconds\n" ] } ], "source": [ "import time\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.model_selection import cross_val_score\n", "import numpy as np\n", "from xgboost import XGBClassifier\n", "from sklearn.svm import SVC\n", "\n", " # Ensure SVM supports probabilities if needed\n", "\n", "# Create a dictionary of estimators\n", "estimators = [('svm', best_svm), ('xgb', xgb)]\n", "\n", "# Create our Voting Classifier\n", "ensemble2 = VotingClassifier(estimators=estimators, voting='soft')\n", "\n", "# Measure computational time for training\n", "start_train = time.time()\n", "ensemble2.fit(X_train, y_train)\n", "end_train = time.time()\n", "train_time = end_train - start_train\n", "\n", "# Measure computational time for testing\n", "start_test = time.time()\n", "test_score = ensemble2.score(X_test, y_test)\n", "end_test = time.time()\n", "test_time = end_test - start_test\n", "\n", "# Perform 10-fold cross-validation to calculate standard deviation\n", "cv_scores = cross_val_score(ensemble2, X_train, y_train, cv=10, scoring='accuracy')\n", "\n", "# Calculate mean and standard deviation of cross-validation scores\n", "mean_accuracy = np.mean(cv_scores)\n", "std_dev_accuracy = np.std(cv_scores)\n", "\n", "print(f\"Cross-Validation Mean Accuracy: {mean_accuracy:.2f}\")\n", "print(f\"Cross-Validation Standard Deviation: {std_dev_accuracy:.2f}\")\n", "\n", "print(f\"Training Time: {train_time:.2f} seconds\")\n", "print(f\"Testing Time: {test_time:.2f} seconds\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "a40cbb57", "metadata": {}, "outputs": [], "source": [ "#ENSEMBLE HARD SVM_GRID" ] }, { "cell_type": "code", "execution_count": 16, "id": "f65339a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Time: 1.38 seconds\n", "Testing Time: 0.01 seconds\n", "Test Accuracy: 0.97\n", "Cross-Validation Mean Accuracy: 0.980427\n", "Cross-Validation Standard Deviation: 0.014656\n" ] } ], "source": [ "import time\n", "from sklearn.ensemble import VotingClassifier\n", "from sklearn.model_selection import cross_val_score\n", "import numpy as np\n", "from xgboost import XGBClassifier\n", "from sklearn.svm import SVC\n", "\n", "# Create base models\n", "#xgb = XGBClassifier()\n", "#svm = SVC(probability=True) # Ensure SVM supports probabilities if needed\n", "\n", "# Create a dictionary of estimators\n", "estimators = [('svm', best_svm), ('xgb', xgb)]\n", "\n", "# Create our Voting Classifier\n", "ensemble1 = VotingClassifier(estimators=estimators, voting='hard')\n", "\n", "# Measure computational time for training\n", "start_train = time.time()\n", "ensemble1.fit(X_train, y_train)\n", "\n", "end_train = time.time()\n", "train_time = end_train - start_train\n", "\n", "# Measure computational time for testing\n", "start_test = time.time()\n", "test_score = ensemble1.score(X_test, y_test)\n", "end_test = time.time()\n", "test_time = end_test - start_test\n", "\n", "print(f\"Training Time: {train_time:.2f} seconds\")\n", "print(f\"Testing Time: {test_time:.2f} seconds\")\n", "print(f\"Test Accuracy: {test_score:.2f}\")\n", "\n", "# Perform 10-fold cross-validation to calculate standard deviation\n", "cv_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring='accuracy')\n", "\n", "# Calculate mean and standard deviation of cross-validation scores\n", "mean_accuracy = np.mean(cv_scores)\n", "std_dev_accuracy = np.std(cv_scores)\n", "\n", "print(f\"Cross-Validation Mean Accuracy: {mean_accuracy:f}\")\n", "print(f\"Cross-Validation Standard Deviation: {std_dev_accuracy:f}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 17, "id": "5a6770cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cross-Validated Accuracy: Mean = 0.9804, Std Dev = 0.01\n", "Cross-Validated Precision: Mean = 0.9955, Std Dev = 0.01\n", "Cross-Validated Recall: Mean = 0.965714, Std Dev = 0.03\n", "Cross-Validated F1-Score: Mean = 0.979966, Std Dev = 0.02\n" ] } ], "source": [ "from sklearn.model_selection import cross_val_score\n", "from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score\n", "import numpy as np\n", "\n", "# Define scoring metrics\n", "precision = make_scorer(precision_score, average='binary') # Change 'binary' to 'weighted' for multi-class\n", "recall = make_scorer(recall_score, average='binary')\n", "f1 = make_scorer(f1_score, average='binary')\n", "\n", "# Perform cross-validation for each metric\n", "accuracy_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring='accuracy') # 'accuracy' is a built-in scorer\n", "precision_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=precision)\n", "recall_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=recall)\n", "f1_scores = cross_val_score(ensemble1, X_train, y_train, cv=10, scoring=f1)\n", "\n", "# Calculate mean and standard deviation for each metric\n", "mean_accuracy = np.mean(accuracy_scores)\n", "std_dev_accuracy = np.std(accuracy_scores)\n", "\n", "mean_precision = np.mean(precision_scores)\n", "std_dev_precision = np.std(precision_scores)\n", "\n", "mean_recall = np.mean(recall_scores)\n", "std_dev_recall = np.std(recall_scores)\n", "\n", "mean_f1 = np.mean(f1_scores)\n", "std_dev_f1 = np.std(f1_scores)\n", "\n", "# Print results\n", "print(f\"Cross-Validated Accuracy: Mean = {mean_accuracy:.4f}, Std Dev = {std_dev_accuracy:.2f}\")\n", "print(f\"Cross-Validated Precision: Mean = {mean_precision:.4f}, Std Dev = {std_dev_precision:.2f}\")\n", "print(f\"Cross-Validated Recall: Mean = {mean_recall:f}, Std Dev = {std_dev_recall:.2f}\")\n", "print(f\"Cross-Validated F1-Score: Mean = {mean_f1:f}, Std Dev = {std_dev_f1:.2f}\")\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "31564605", "metadata": {}, "outputs": [], "source": [ "#Explainable AI" ] }, { "cell_type": "code", "execution_count": 20, "id": "d20f61b9", "metadata": {}, "outputs": [], "source": [ "#LIME" ] }, { "cell_type": "code", "execution_count": 23, "id": "98db29bd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Collecting lime\n", " Downloading lime-0.2.0.1.tar.gz (275 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m275.7/275.7 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: matplotlib in /home/gowthami.cse/.local/lib/python3.7/site-packages (from lime) (3.5.3)\n", "Requirement already satisfied: numpy in /home/gowthami.cse/.local/lib/python3.7/site-packages (from lime) (1.18.5)\n", "Requirement already satisfied: scipy in /home/gowthami.cse/.local/lib/python3.7/site-packages (from lime) (1.7.3)\n", "Requirement already satisfied: tqdm in /opt/anaconda3/lib/python3.7/site-packages (from lime) (4.64.1)\n", "Requirement already satisfied: scikit-learn>=0.18 in /opt/anaconda3/lib/python3.7/site-packages (from lime) (1.0.2)\n", "Requirement already satisfied: scikit-image>=0.12 in /opt/anaconda3/lib/python3.7/site-packages (from lime) (0.19.3)\n", "Requirement already satisfied: networkx>=2.2 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (2.6.3)\n", "Requirement already satisfied: pillow!=7.1.0,!=7.1.1,!=8.3.0,>=6.1.0 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (9.5.0)\n", "Requirement already satisfied: imageio>=2.4.1 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (2.22.4)\n", "Requirement already satisfied: tifffile>=2019.7.26 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (2021.11.2)\n", "Requirement already satisfied: PyWavelets>=1.1.1 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (1.3.0)\n", "Requirement already satisfied: packaging>=20.0 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from scikit-image>=0.12->lime) (24.0)\n", "Requirement already satisfied: joblib>=0.11 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-learn>=0.18->lime) (1.2.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/anaconda3/lib/python3.7/site-packages (from scikit-learn>=0.18->lime) (3.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from matplotlib->lime) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from matplotlib->lime) (4.38.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from matplotlib->lime) (1.4.5)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from matplotlib->lime) (3.1.4)\n", "Requirement already satisfied: python-dateutil>=2.7 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from matplotlib->lime) (2.9.0.post0)\n", "Requirement already satisfied: typing-extensions in /opt/anaconda3/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib->lime) (4.7.1)\n", "Requirement already satisfied: six>=1.5 in /home/gowthami.cse/.local/lib/python3.7/site-packages (from python-dateutil>=2.7->matplotlib->lime) (1.16.0)\n", "Building wheels for collected packages: lime\n", " Building wheel for lime (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283839 sha256=64ed670f4986da3fa78e3fc8fd1dd5b97fcc7bd240ed8b31219a309d94a55936\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-feidwlvb/wheels/ca/cb/e5/ac701e12d365a08917bf4c6171c0961bc880a8181359c66aa7\n", "Successfully built lime\n", "Installing collected packages: lime\n", "Successfully installed lime-0.2.0.1\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3.7 install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install lime" ] }, { "cell_type": "code", "execution_count": 24, "id": "cf8e1840", "metadata": {}, "outputs": [], "source": [ "from lime import lime_tabular\n", "#feature_names = X.columns\n", "\n", "inter= lime_tabular.LimeTabularExplainer(training_data=np.array(X_train),\n", "\t\t\t\t\t\t\t\t\t\t\t\tfeature_names =X_train.columns,\n", " mode= 'classification')" ] }, { "cell_type": "code", "execution_count": 32, "id": "ef24e17f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "\n", " | Age (yrs) | \n", "Weight (Kg) | \n", "BMI | \n", "Pulse rate(bpm) | \n", "Hb(g/dl) | \n", "Cycle(R/I) | \n", "Cycle length(days) | \n", "Marraige Status (Yrs) | \n", "Hip(inch) | \n", "Waist(inch) | \n", "... | \n", "hair growth(Y/N) | \n", "Skin darkening (Y/N) | \n", "Hair loss(Y/N) | \n", "Pimples(Y/N) | \n", "Fast food (Y/N) | \n", "Follicle No. (L) | \n", "Follicle No. (R) | \n", "Avg. F size (L) (mm) | \n", "Avg. F size (R) (mm) | \n", "Endometrium (mm) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20 | \n", "0.555556 | \n", "0.231362 | \n", "0.294513 | \n", "0.855072 | \n", "0.698654 | \n", "0.0 | \n", "0.3 | \n", "0.739797 | \n", "0.318182 | \n", "0.304348 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.045455 | \n", "0.15 | \n", "0.747054 | \n", "0.496519 | \n", "0.236298 | \n", "
1 rows × 23 columns
\n", "