{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1d56aa6c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import os\n", "import math\n", "from tabulate import tabulate\n", "from difflib import SequenceMatcher\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "from collections import defaultdict\n", "from itertools import chain, combinations\n", "from fpgrowth_py import fpgrowth\n", "import time\n", "\n", "from dna import dna\n", "from scov import numpy_image_dict\n", "from helper import *\n", "import zlib\n", "import lzma" ] }, { "cell_type": "code", "execution_count": 2, "id": "1d03a554", "metadata": {}, "outputs": [], "source": [ "input_file1 = r'./input/China_Seq.txt'" ] }, { "cell_type": "code", "execution_count": 3, "id": "ddb79f61", "metadata": {}, "outputs": [], "source": [ "input_file2= r'./input/USA_Seq.txt'" ] }, { "cell_type": "code", "execution_count": 4, "id": "07294521", "metadata": {}, "outputs": [], "source": [ "df_sequence = pd.DataFrame(columns=[\"line no\",\"sequence ratios\"])" ] }, { "cell_type": "code", "execution_count": 5, "id": "ae2da989", "metadata": {}, "outputs": [], "source": [ "count_lines=0" ] }, { "cell_type": "code", "execution_count": 6, "id": "adf7ace4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
line nosequence ratios
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [line no, sequence ratios]\n", "Index: []" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sequence" ] }, { "cell_type": "code", "execution_count": 7, "id": "d32d1210", "metadata": {}, "outputs": [], "source": [ "current_start_time = time.time()" ] }, { "cell_type": "code", "execution_count": 8, "id": "a48476fe", "metadata": {}, "outputs": [], "source": [ "with open(input_file1) as file1, open(input_file2) as file2:\n", " for lineno, (sequence1, sequence2) in enumerate(zip(file1, file2), 1):\n", " sequence = SequenceMatcher(a=sequence1 , b=sequence2) #comparing both the strings\n", " #print(lineno,\" - \",sequence.ratio())\n", " df_sequence.loc[len(df_sequence.index)] = [lineno,sequence.ratio()]\n", " df_sequence['line no'] = df_sequence['line no'].astype(int)\n", " count_lines+=1" ] }, { "cell_type": "code", "execution_count": 9, "id": "761e7493", "metadata": {}, "outputs": [], "source": [ "current_end_time = time.time()" ] }, { "cell_type": "code", "execution_count": 10, "id": "54c99034", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total Time : 1.7870872020721436\n" ] } ], "source": [ "print(\"Total Time :\",current_end_time-current_start_time)" ] }, { "cell_type": "code", "execution_count": 11, "id": "24d5ea72", "metadata": {}, "outputs": [], "source": [ "df_sequence_missings = df_sequence[df_sequence['sequence ratios']<1]" ] }, { "cell_type": "code", "execution_count": 12, "id": "f74e49b1", "metadata": {}, "outputs": [], "source": [ "length_changed_genome = len(df_sequence_missings)" ] }, { "cell_type": "code", "execution_count": 13, "id": "d7393abd", "metadata": {}, "outputs": [], "source": [ "perc_missing_values = (length_changed_genome/count_lines)*100" ] }, { "cell_type": "code", "execution_count": 14, "id": "63ddc7c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Percentage of changed Genome Sequence 1.31 %\n" ] } ], "source": [ "print(\"Percentage of changed Genome Sequence \",round(perc_missing_values,2),\"%\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "64ef5d9d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
line nosequence ratios
1211220.985915
2492500.985915
2512520.985915
2542550.985915
4264270.985915
5895900.985915
6016020.985915
6116120.989474
\n", "
" ], "text/plain": [ " line no sequence ratios\n", "121 122 0.985915\n", "249 250 0.985915\n", "251 252 0.985915\n", "254 255 0.985915\n", "426 427 0.985915\n", "589 590 0.985915\n", "601 602 0.985915\n", "611 612 0.989474" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sequence_missings" ] }, { "cell_type": "code", "execution_count": 16, "id": "5b7fab09", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEPCAYAAABP1MOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4kklEQVR4nO3dd3gU1frA8e+b0JEOAkoJTRSlGiIgKEUpKqCACtgQBcSLoFfx4lUxihflp1e9XLkqWMACCKIISBGRIgiS0LsiRQJKCx1Dy/v7YyZhk2ySDW5J2PfzPHmyO3N25t3JZN+dc86cI6qKMcaY8BUR6gCMMcaEliUCY4wJc5YIjDEmzFkiMMaYMGeJwBhjwly+UAeQU2XLltWoqKhQh2GMMXnKihUrDqhqOW/r8lwiiIqKIj4+PtRhGGNMniIiOzNbZ1VDxhgT5iwRGGNMmLNEYIwxYS5giUBEPhSRfSKyPpP1IiIjRWSriKwVkUaBisUYY0zmAnlFMBZon8X6DkAt96cv8E4AYzHGGJOJgCUCVV0EJGZRpDPwsTqWASVFpGKg4jHGGONdKNsILgd2eTxPcJdlICJ9RSReROL3798flOCMMSZc5InGYlUdrarRqhpdrpzX+yGMMcZcIPH3fAQrVqyIioyM7BsREdHhzJkz5Q4dOnRpuXLl9qQvd/jw4TIFCxZMKly48AmAffv2XV6mTJk/IiMjz2W1/YMHD1atWNFqkIwx4SsyMpJixYpRunRpChYs6NNrRGSFqkZ7W+fXO4tXrFgRlT9//i/Lly9fsmTJkseSk5MTt27dWqZu3boH0pdNTEw8s2/fvktr166989ixY0Xz5ct3+uqrr96b3T42btxY9aqrrvIpnrUJh3P+JrJQr1JJv24P/B8jWJz+ZnH6V7jG6a8YVRWSz3H82EmOH/+NKlWq+JwMMuPXRBAZGdm3fPnyJcuXL5/4yy+/VDtx4kSxc+fO5Vu9enW9ihUr7lFVAahQocL+UqVKHTly5EiJdevWXSMiyVFRUTv8GYsxxlyMRAQi85GvSHFKFTpLYmIif7WWxK+JICIiokPJkiWPAdSqVWt7VmVFhGrVqv3mz/0bY0w4KV68ODt27PjLicCvjcWqWqpAgQJn/LlNY4wx3uXPn59z57JsVvWJ33sNiYi/N2mMMcYLf33e5onuo8YYYwLHEoExxoQ5SwTGGBPmgjpDWdSQb671z5ay7JB0waYNuD4g2w2E3bt+45Zm9enUrQfD3vxfqMO5YCNHjuTdd99l+/btJCUl8eabb/L4448HdJ9xSxfz8F0deeSJf9D/70MCui8R4cYbb2TBggUB3Y8xf4VdEQSAiFijuQ8mTpzIoEGDKFSoEI8//jgvvPACTZo0yfI1sbGxqcf3/vvvz7Rc/NIl1K9civqVS9GhaT1/h57nbdmyhT59+lCzZk0KFSpE0aJFqVatGm3btuWll15i795s7+00F5E8N2exuXjMmDEj9fdll12Wo9fmy5ePL774gpEjR1KyZMkM66dMGEe+fPk4e/ZshnXXNGjE1Pk/UbJ0mQuKOyc2bdpEkSJFAr6fnPj++++59dZbSUpKomnTprRv357ixYuzZ88efvzxR+bOnUuzZs0oX758qEM1QWKJwITMnj3OEFQ5TQIAt912G1OnTuWzzz7jb3/7W5p1Rw8fZt7M6dxwU3u+nz0jw2sLFy5CtZpXXFjQOXTllVcGZT850a9fP5KSkhg7diwPPPBAhvVr166lVKlSIYjMhIpVDQXJjh07EBF69erFjh076N69O2XLlqVxzQr0uKUVC7+b7fV1J44f47UXn+XmxlfTuGYFOreM4ePRo9Dk5Ez3dfLkSV555RUaNGhA0aJFueSSS2jatCkTJkxIU+6nn36iQIECVK9enSNHjqRZ9/vvv1O+fHkuueQSNm/e7NN7PH3qFB+MepOuNzXjulqX0eyqKvTq0oE5079KU+6dN16lfuVSzJ8/HzhflZaT6rT27dtTqVIl3n///Qzrpn/5OadOJdG1p/eqo7ili6lfuRTvvPFqmuUJO3fw0j8e57bmjYipWZEW11Sj603NeOSRRzh48OD593n6NCNHjqRRo0aUKlWKIkWKEBUVRefOnfnuu+/SbFNEaNmyZZplsbGx1K9cirili5n7zdf0vK0N19W6jBbXVOPpR3uz9/cMYzQCsH71Svr17ELTKyvT7Koq9O1xO2tWLE89nr60Q+zbt4+tW7dSokQJr0kAoF69elSuXDnD8r2/72b4c4O55foGRNcozw11qzPwwR6sX73S63YO7t/HC089RquGVxBTsyJ3tWvBtMkTWLBgASJCbGxsmvJRUVFERUV53VZKlaC397h582aef+JR2sZczbXVL6VVwysYMuBhdvz6S4ayzz/xKPUrl2L3rt+Y/OlHdL2pGY1rVqBVwyt46R+Pc+zokQyvAUhISGDgwIHUqlWLwoULU7p0aWJiYhg2bJjXsgMGDKB69eoULFiQMmXK0KlTJ6/H6cTxY7z31mt0adOUZldVoemVlbn1+oYM7t+bjWtXe40lEOyKIMh27txJTEwM1atX57777uPXhD+YM/0rHn/oHt6bMJWYZi1Sy54+dYo+3W9nw5qV1K5zDbfc0Y1jR48weuRrrFi2xOv2jx45QvNOd7Bq1SoaNWpE7969SU5OZs6cOfTs2ZMNGzbw8ssvA3DdddcxfPhwBg8eTJ8+fZg0aRIAycnJ3HPPPezbt4+xY8dy5ZVXZjtg1pnTp+l/b1fily2hWs0ruPuBh0j680/mzpzG04/2ZsuGdQwcMhSA6KbNeQSY9eVEdu7cyQsvvJDj4xgZGUnv3r156aWXiI+PJzr6/KCKX44fx+VVqnJd85Y+b2//3j/oeVtrThw/RvNWN9Pmlo6cPnWK3b/t5JNPPmHAgAGUKeNUJfXq1YsJEyZwzTXXcP/991O4cGH27NnD4sWLmT17NjfddJNP+5z08QcsmDuLljd3ILpJM9atWsGc6V/x86YNTJq9iAIeA4mtWLaER+7tSvK5c7Ru35HKVaP4ZctGHr67U5pzJjslSpQgX758HD9+nN9//93noQk2rVvDI/d04cjhQzS7sTVt2nfkcOJB5n/7Db26duDNMZ/QonXb1PKHEg9y/+3tSPhtBw0bN6FhTBMO7N3Ly8/8nXbt2maxp5yZPXs2Xbp04cyZM9xwU3uqRFVj7+97mDd7Bj98/y3vfz6dq+rWz/C6t4a/wI8L53HDTe1pekNr4pb+wJTx4/htxzbe/3xamrLx8fG0a9eOxMREbrjhBrp06cLJkyfZuHEjsbGxPP/886llV65cSdu2bUlMTKRdu3Z06dKFAwcOMHXqVGbPmZPmOKkqj97XjdXxy6l/bWPu6H4f+fJFsvf3PcQtXUyj65pQp14Dvx2rrFgiCLIFCxYQGxub+uG3NuEwHTp349H7ujHu3ZFp/qk/Hv02G9aspE2Hjrz+7lgiIpwLuN6PPkGPW1p63f5rsc+watUqRowYwdNPP526PCkpidtvv53hw4fTrVs3GjRoAMCTTz7J/PnzmTx5Mu+99x79+vVj2LBhzJ8/n/vvvz/Tb43pfTz6beKXLaF5q5v4z4cTyJfPObX6PfEP7u3Yhg9GvckNN7WjQfR1NG7anMZNm7Np5TJ27tyZ4Zuhr3r37s3LL7/MmDFjUhPB2pVxbN2yiQGDn83RFcbcmdM4cvgQT8e+wj0PPZJmXY1S+VOP/ZEjR5g4cSLXXnstP/30E5GRkWnKel45ZGfJgnmMnz6PWlddnbpsyICHmfX1FOZ/O5N2He8AnMQcO3ggp0+dYtTHk2je6ubU8pM++ZB//fNJn/dZsGBBOnfuzJQpU2jevDn9+/enRYsW1K1bN9O2jLNnzzK4/4OcPHmC9z+fTnTT873r9v3xOz1va0Ps4IHM+nFNavL674hhJPy2g3sf6s/g2OGp5bv36sP9t/snERw6dIgePXpQpEgRxkyaQY0rzlfD/bJ5I/d1bkvs0wP5fNbCDK9duzKOL+YuoeLllVPfY5+7OxH34w+sW7WCug2dDo6nT5/mzjvvJDExkc8++4yePXum2U5CQkLq47Nnz3LXXXdx/Phx5s+fz4033pi6bs+ePTRoFJ3mOG3dvJHV8ctp1e5W3nr/0zTbTU5O5vjRo3/9IPnIqoaCrGrVqjz33HNpll3fsg0VL6+U4dLx60njiYiI4Il/vpj6QQRQqUpVevbul2Hbhw8l8s1Xk4iOjk6TBAAKFSrEiBEjUFXGjx+fulxEGDduHJdffjmPP/44o0aNYtiwYdSuXZv//c/3bqlTP/8MEeGpof9KTQIAZcqWo8+gwQB8OeETn7fni6pVq9K2bVsmTJjAiRMnAJgy/mMiIyPpfNc9F7TNgoUKZVhWtGhRChcuDDjHS1UpWLBgmr9JipSrBl/0fLBvmiQA0KWnk3g9z4XV8T/x245tNG7WIk0SAOh2Ty+qVq/p8z4BxowZQ5cuXdi+fTuDBw+mSZMmFCtWjPr16/Pcc89l6DH0w7xv2bVzOz169UmTBAAurVCRB/sP5MC+vfy0xPnAPXPmDN98NZmilxTjkb//I035q+s35J57Luxvk97HH3/M4cOHefHFF9MkAYBaV9aha4/72bx+Lb/+nLFqs9/jT6cmAXA6H6ScM+vXrEhdPn36dHbs2EGnTp0yJAGASpUqpT7+5ptv+PXXX3nsscfSJAFw2sHSH6cUhbyccxERERT30gkiUOyKIMgaNGiQ4VskQPnLLmftirjU5yeOH+O3HduocNnlVI6qlqF8dNPm8OaINMs2rF7JuXPnvNa/gvMPCk5PFk9ly5Zl/PjxtG7dmgEDBlCoUCE+//xzihYt6tN7Son10gqXeW2EjWl2AwCbN6z1aXs50adPH2bPns3EiRO58847mTP9K1q0bsulFSp67TGUmZY3t+e/I4bxynOD+XHh9zS7sTUNoq/L8AFTvHhxOnbsyPTp02nQoAFdu3alRYsWXHfddTnuHVSnXsMMyypUdGZrPXrkcOqyzRvWAdCwccautREREdS/Noad27b6vN9SpUoxZcoUduzYwZw5c4iPjycuLo61a9eydu1a3nnnHWbPnk3jxo0BWLNyOQC/707I0K4C8Nv2XwHY9svPtGjdlh1bfybpz5M0imlKseIlMpRv2bIl48aN8znezCxdutSJb80a1v+6K8P6ndudY7J9688Z/o5ej/1l7rE/fL6dYNmyZQB06NDB53gyu8pdv2YDcP44Vb/iSmpfXZdZX0/h9927aNn2Fho2bsLV9RqSv0CBbPfnT5YIgsxbV0eAfJH5SPZoAD5+zLksLFP2Uq/ly5bLuPzw4UQA4uLiiIuLy7A+ddvHj2dYFhMTQ5UqVdi+fTutWrWifv2M9aqZbs+Ntdyl3rsbpizPrCHur+jYsSPly5fn/fff58yZM/x58gRdMmkkzspllarw2fTveOeNEfy48DvmzZoOOB8Oz/zjaQYOHJha9vPPP2fEiBGMHz8+tYqvUKFCdOvWjddff93nbpfFSmT8kIx0r6aSPUaUTKkiKFPW+zStZS5w+taoqCj69etHv37O1WVCQgKPPvoo06dPp0+fPqxevRqAI4cOAfDtjKlZbu/Pk85V2bGUc9fLOQpQoUKFC4o3vZRquDFjxmRZ7qR7tejJW4JKPfbJ54/94cOHAbj8cq/TqXuNZ/LkyVmWSzlOkZGRvD9xGu+99X/Mnfk1bw2PBaDoJcXo2K07g4YMpUjRS7Ldrz9YIsilLilWHICDB/Z5XX9gf8blKa954okneOONN3K0v0GDBrF9+3bKli3LrFmz+Oyzz3y+hE/Z74H93m9C2r/PWV7MLedP+fPn58EHH+TVV18lISGB8hUvy1B94qvqtWrz2jsfcvbsWX7euJ5lixcw4aMxDBo0iKJFi/LQQw8BULhwYWJjY4mNjWXXrl0sWrSIsWPH8umnn7Jjxw5++OEHf75FihYrBsDBA/u9rj+43/vynKpUqRITJ06kVKlSrFmzhsTERCAi9e/7nw8+o2XbW7LdTsrf+aCXcxTgjz/+8Lo8IiKC06dPe12X8oHsqYSbSNesWQOlq2Qb14VI+eK2e/fubMumxPP111/TqVOnDOu9dbgoXrIkg2OHMzh2OL9t30b8T0v44tOxTBw7hmNHjzD8P+/9pfh9ZW0EuVTRS4pRJao6+/74nV07Mg6pEb90cYZl1zS4loiIiBx/EE2aNInRo0dzww03sHLlSsqVK8cjjzzCL79k7H6XWayVq1Zj3x+/s9OtJvAU96MTz5XX+H6VkRMPP/wwIkJCQgK3332v16q3nMiXLx916jWg96OPM+Jt59vm1KlTvZatXLky99xzD3PmzKFmzZosXrw4Rw3Gvrjq6roArIpblmFdcnIya1Ys99u+ChYsSAG3WiJlPvN6jZyG+JXLl/q0jaiaV1CocBG2bFzv9Sows26upUqVYu/evalVmJ7i4+MzLEu5C93fidfbPmbNmuVz2QuNp0q16nTpfh8fTp5BkaKXsODb7PfpL5YIcrHOd/UkOTmZt16JTVNtlPDbTsZ/mPGbQpmy5bjljjuJj49n2LBhXies+PXXX9m+/Xxi2bZtG3369KFMmTKMHz+eypUrM27cOE6cOMHdd9/NqVOnfIr19rvvQVV58+WhafZ7KPEgo0e+llomEGrUqMHs2bP56quvvDai+2Lj2tVeP7RSvoWn1P/v37+fdevWZSh34sQJjh8/Tr58+VI/SP2lQeMmVK5ajbgff2Dx/Llp1n3x2dgctQ+cOHGCYcOGZTqExFtvvcXx48epU6dOasN3y7a3ULlqNT4f9wE/fP+t19etWbGcP/88CThXabfecScnjh/j3TfStWOtWcVnn33mdRsxMTGcPXuWjz76KM3ysWPHsmRJxu7SDz74ICVLluTFF19k3aoVGdYnJycT5+ULU0507NiRqKgopk2bluE+HEjba6hz587UqFGDUaNGMXPmTK/b8zxOCb/tJGHnjgxljh45zOnTp7x2XAgUqxrKxe7vO4Dv58zku5nT6N7hRpre2JpjR4/w7YypXBvTjAVzM35jeGbY/3Fg906GDh3KJ598QvPmzSlfvjx79uxh06ZNxMXFMWHCBKpVq8aZM2fo3r07R48eZdq0aan1oB06dODJJ5/k9ddf56mnnuK///1vtrE+0O8xFs//jvnfzuTOts1p0fpm/vzzT+Z+8zWJB/bTq/9AGsU09fsxStG2rdMl8UInCJ/x5ed88elYGsY0oVLVKIqXKEnCzh0snDubggULpg6Et3v3bho2bEjdunVTb7w6evQoM2bM4I8//mDgwIEUc6ty/CUiIoIXXhvJo/d1Y1DvnrTp0JHKVavx8+YNLPthAc1b3cTi+d957cWU3pkzZxg6dCgvvvgiMTExNGjQgFKlSpGYmMiSJUtYt24dRYsW5d133019Tf78+XljzMf0v7cbAx64mwbRMdSuU5dChQvzx57dbFizioTfdjBvxWYKF3YS5mP/eJ6fFi/k0w/eYcPaVan3EcyZ/hW33HIL06ZNyxDbY489xkcffUT//v2ZN28elStXZvXq1SxdupTbbrstdUiSFGXKlOGLL77gjjvu4L7ON3Nd8xupccWVCMIfv+9mzYo4jhxOJG6r96ooXxQoUIDJkyfTtm1bevbsyXvvvUeTJk1ISkpi06ZNzJs3L7VTQv78+fnyyy9p164dt956K82aNaNBgwYUKVKEXbt2sWTpT2mO088b1/P3vvdxdf1GVK95BeXKV+BQ4kHmfzuTs2fO8GD/QRccd04FNRHsePXWjGk7hzZu3HhtnTp1fCp7oR8KuUWBggUZPeEr3nljBHOmf8X4D9/jskpV6PPYU7Rpf5vXRHBJseIsXLiQ0aNHM378eKZMmUJSUhLly5enVq1avPnmm9x8s1OHPmTIEOLi4hg4cCAdO3ZMs53hw4ezaNEi3n77bVq3bk2Nxq2yjDV/gQK8N/4rPhkziplTv2DCR2OIzJePK+pczdMvDKfD7d38d2ACoH3nrpw+dYo1K5azce1qTiUlcWmFirTr1IV/DX2Ga665BnAaWF988UUWLFjA/PnzOXDgAKVLl6Z27dq8+uqrdO/ePSDxNW7anA8mz2DUa//ih++dq4K6Da/l/c+n8c1XTuNk8eLZt8EUL16cWbNmMXfuXBYvXszUqVPZv38/hQoVolq1agwaNIjHH388wx2+V1x1DZPm/MAnY0ax6Ls5fD1pPBIRQblLy3PlNfXo/+SQNGM3lSpdhnFfzWbkiGEs+m42G9euJqpGTZ4d/m9aNKrjNRHUqVOH7777jn/+859Mnz6dfPny0aJFC5YuXcqXX36ZIREAtGnThrVr1zIk9l8sXfg9K5cvJX/+ApQrX4GY61twU4eMdfU5FR0dzerVq3n11VeZNWsWP/74I8WKFaNmzZq89NJLacrWq1ePNWvW8MYbbzBjxgw++ugjIiIiqFixYobjdHV9p/pxxU9LWLJwHkePHKZU6bLUqVufnr37XXBb14WQlHpAf1izZs2O+vXrH/DbBr0IZSKoV6mkX7cHgUlWFqd/5fY4H7ijHetWreDIkSM+d/n1VSCOZ+LW1bRq1YoXXnjhgm8mTC8v/N0DFeOmTZu46qqrsi0rIitUNdrbOmsjMCYP+PPPkxw9krEN4+tJ41kdv5ymN7TyexIw4cPaCIzJA/7YncDd7W+kSYuWVI6qzrlzZ9m8fi2r4pZRrEQJnnz+5VCHaPIwSwTG5AFlyl7KLXfcyYplS4hbupjTp09RttyldL7rHvo89qTXu8+N8ZUlAmPygOIlSxL72shQh+EXLVu2xJ9tk+avszYCY4wJc5YIjDEmzPk9EdglnzHGBIe/Pm/9mghE5NDp06fz+3ObxhhjvDtz5sxfHlsL/JwIkpOTZx0+fNi/99cbY4zx6ujRo34Z0sSvieDcuXOj9+7de3jv3r2lT506ld+qiYwxxr9UFT13lrMnj3Lo0CFKly79l7fp1yEmAFasWBEVGRnZNyIiooOqlvLrxoGDBw9W9XXC7YRDf/p135VKFfbr9sD/MYLF6W8Wp3+Fa5z+jPHEmWTid//J811jKOjOE52drIaY8HsiCLTo6Gj1Nja5N1FDvvHrvne8eqtftwf+jxEsTn+zOP0rXOMMdYwhG2tIRNqLyBYR2SoiQ7ysryIi80VklYisFZHspz8yxhjjVwFLBCISCYwCOgB1gB4ikn7Y0OeASaraEOgO/C9Q8RhjjPEukFcEMcBWVd2mqqeBiUDndGUUSBlEvQSwJ4DxGGOM8SKQieByYJfH8wR3madY4F4RSQBmAo9525CI9BWReBGJ3++nibqNMcY4Qj3ERA9grKpWAm4BPhGRDDGp6mhVjVbV6HLlygU9SGOMuZgFMhHsBip7PK/kLvP0EDAJQFWXAoWAsgGMyRhjTDqBTARxQC0RqSYiBXAag9NPVPob0AZARK7CSQRW92OMMUEUsESgqmeBAcAcYBNO76ANIvKSiKTMKP0k0EdE1gATgF6a125sMMaYPC6gE9Oo6kycRmDPZUM9Hm8Erg9kDMYYY7IW6sZiY4wxIWaJwBhjwtwFJQIRudLfgRhjjAmNC70i+NavURhjjAmZTBuLRWRkZquAkgGJxhhjTNBl1WvoQZzunae8rOsRmHCMMcYEW1aJIA5Yr6o/pl8hIrEBi8gYY0xQZZUIugFJ3laoarXAhGOMMSbYMk0EqpoYzECMMcaEht1HYIwxYc4SgTHGhDlLBMYYE+ayTAQiUkpEXk63rIeINAtsWMYYY4Ily0SgqoeAm0WkpsfiocDPAY3KGGNM0PhSNfQB0BtARFoCG1X1QABjMsYYE0S+JIIJQFcREaAXMCagERljjAmqbBOBqh4DfgTuBq7DmXHMGGPMRcLXGcreB6YDb9tUksYYc3Hxqfuoqi4BPsFJCMYYYy4iPs9ZrKqDAhmIMcaY0LAbyowxJsxZIjDGmDBnicAYY8KcT20E7pASUZ7lVfXjAMVkjDEmiLJNBCLyCVADWA2ccxcrYInAGGMuAr5cEUQDdez+AWOMuTj50kawHqgQ6ECMMcaEhi9XBGWBjSKyHDiVslBVOwUsKmOMMUHjSyKIDXQQxhhjQifbRKCqC0WkPNDYXbRcVfcFNixjjDHBkm0bgYjcBSwH7gTuAn4SkW6BDswYY0xw+FI19CzQOOUqQETKAd8BXwQyMGOMMcHhS6+hiHRVQQd9fJ0xxpg8wJcP9NkiMkdEeolIL+AbYKYvGxeR9iKyRUS2isiQTMrcJSIbRWSDiIz3PXRjjDH+4Etj8WAR6Qpc7y4arapfZfc6EYkERgE3AwlAnIhMU9WNHmVqAc8A16vqIRG59ELehDHGmAvn01hDqjoFmJLDbccAW1V1G4CITAQ6Axs9yvQBRqnqIXc/1hvJGGOCLNOqIRFZ7P4+JiJHPX6OichRH7Z9ObDL43mCu8zTFcAVIrJERJaJSPtMYukrIvEiEr9//34fdm2MMcZXmV4RqGpz93exAO+/FtASqAQsEpG6qno4XSyjgdEA0dHRNuaRMcb4kS/3EXziyzIvdgOVPZ5Xcpd5SgCmqeoZVd0O/IyTGIwxxgSJL72GrvZ8IiL5gGt9eF0cUEtEqolIAaA7MC1dmak4VwOISFmcqqJtPmzbGGOMn2TVRvCMiBwD6nm2DwB7ga+z27CqngUGAHOATcAkVd0gIi+JSMqAdXOAgyKyEZgPDFbVg3/xPRljjMmBrNoIXgFeEZFXVPWZC9m4qs4k3T0HqjrU47ECf3d/jDHGhIAv9xE8IyKlcOruC3ksXxTIwIwxxgSHL1NVPgwMwmnsXQ00AZYCrQMamTHGmKDwpbF4EM4Q1DtVtRXQEDgcyKCMMcYEjy+JIElVkwBEpKCqbgZqBzYsY4wxweLLEBMJIlISp6vnXBE5BOwMZFDGGGOCx5fG4jvch7EiMh8oAcwKaFTGGGOCJkfzCqjqQiAJH4ehNsYYk/tldUNZaxH5WUSOi8inIlJXROKBV4B3gheiMcaYQMrqiuDfQF+gDM60lEuBsap6rap+GYzgjDHGBF5WbQSqqgvcx1NFZLeqvh2EmIwxxgRRVomgpIh08Szr+dyuCowx5uKQVSJYCHT0eL7I47kClgiMMeYikNWgcw8GMxBjjDGhkaPuo8YYYy4+lgiMMSbMWSIwxpgw58ucxUVE5HkRGeM+ryUitwU+NGOMMcHgyxXBR8ApoKn7fDfwcsAiMsYYE1S+JIIaqvp/wBkAVT0JSECjMsYYEzS+JILTIlIY594BRKQGzhWCMcaYi4Av8xG8AMwGKovIZ8D1QK9ABmWMMSZ4fJmPYK6IrMSZq1iAQap6IOCRGWOMCQpfeg3dAZxV1W9UdQZwVkRuD3hkxhhjgsKXNoIXVPVIyhNVPYxTXWSMMeYi4Esi8FbGl7YFY4wxeYAviSBeRN4QkRruzxvAikAHZowxJjh8SQSPAaeBz92fU8DfAhmUMcaY4PGl19AJYEgQYjHGGBMC2SYCEbkCeAqI8iyvqq0DF5Yxxphg8aXRdzLwLvA+cC6w4RhjjAk2XxLBWVV9J+CRGGOMCQlfGouni8ijIlJRREqn/AQ8MmOMMUHhyxXBA+7vwR7LFKju/3CMMcYEW7ZXBKpazcuPT0lARNqLyBYR2SoimfY8EpGuIqIiEp2T4I0xxvx1vs5Q9pyIjHaf+zRDmYhEAqOADkAdoIeI1PFSrhgwCPgpp8EbY4z563ydoew00Mx97usMZTHAVlXdpqqngYlAZy/lhgEjgCQftmmMMcbPAjlD2eXALo/nCe6yVCLSCKisqt9ktSER6Ssi8SISv3//fh92bYwxxlchm6FMRCKAN4AnsyurqqNVNVpVo8uVK/dXd22MMcZDIGco2w1U9nheyV2WohhwDbBARAAqANNEpJOqxvuwfWOMMX4QyBnK4oBaIlINJwF0B3p6bPcIUDbluYgsAJ6yJGCMMcHly1hDN7gPj7m/64gIqrooq9ep6lkRGQDMASKBD1V1g4i8BMSr6rS/Ergxxhj/8KVqyPNGskI4vYFWANkOOqeqM4GZ6ZYNzaRsSx9iMcYY42e+VA119HwuIpWBtwIVkDHGmODypddQegnAVf4OxBhjTGj40kbwX9yuoziJowGwMoAxGWOMCSJf2gg8e/GcBSao6pIAxWOMMSbIfGkjGBeMQIwxxoSGL1VD6zhfNZRmFaCqWs/vURljjAkaX6qGZrm/P3F/3+P+tlnLjDHmIuBLIrhZVRt6PB8iIitVNdP5BYwxxuQdvnQfFRG53uNJMx9fZ4wxJg/w5YrgIeBDESnhPj8M9A5YRMYYY4LKl15DK4D6KYnAHSzOGGPMRcKXqSrLi8gHwERVPSIidUTkoSDEZowxJgh8qesfizOC6GXu85+BxwMUjzHGmCDzJRGUVdVJQDI4w0sD5wIalTHGmKDxJRGcEJEynJ+qsglg7QTGGHOR8KXX0N+BaUANEVkClAO6BTQqY4wxQeNLr6GVInIjUBtnWIktqnom4JEZY4wJikyrhkSksYhUgNR2gWuBfwH/FpHSQYrPGGNMgGXVRvAecBpS5y1+FfgYp31gdOBDM8YYEwxZVQ1Fqmqi+/huYLSqTgGmiMjqgEdmjDEmKLK6IogUkZRE0Qb43mOdL43Mxhhj8oCsPtAnAAtF5ADwJ/ADgIjUxLqPGmPMRSPTRKCq/xKReUBF4FtV9Zy3+LFgBGeMMSbwsqziUdVlXpb9HLhwjDHGBJvNK2CMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYS6giUBE2ovIFhHZKiJDvKz/u4hsFJG1IjJPRKoGMh5jjDEZBSwRiEgkMAroANQBeohInXTFVgHRqloP+AL4v0DFY4wxxrtAXhHEAFtVdZuqngYmAp09C6jqfFU96T5dBlQKYDzGGGO8CGQiuBzY5fE8wV2WmYeAWd5WiEhfEYkXkfj9+/f7MURjjDG5orFYRO4FooHXvK1X1dGqGq2q0eXKlQtucMYYc5EL5Exju4HKHs8rucvSEJGbgGeBG1X1VADjMcYY40UgrwjigFoiUk1ECgDdgWmeBUSkIfAe0ElV9wUwFmOMMZkIWCJQ1bPAAGAOsAmYpKobROQlEenkFnsNuASYLCKrRWRaJpszxhgTIAGdhF5VZwIz0y0b6vH4pkDu3xhjTPZyRWOxMcaY0LFEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYC2giEJH2IrJFRLaKyBAv6wuKyOfu+p9EJCqQ8RhjjMkoYIlARCKBUUAHoA7QQ0TqpCv2EHBIVWsCbwIjAhWPMcYY7wJ5RRADbFXVbap6GpgIdE5XpjMwzn38BdBGRCSAMRljjElHVDUwGxbpBrRX1Yfd5/cB16nqAI8y690yCe7zX90yB9Jtqy/Q131aG9ji53DLAgeyLRV6Fqd/5YU480KMYHH6WyDirKqq5bytyOfnHQWEqo4GRgdq+yISr6rRgdq+v1ic/pUX4swLMYLF6W/BjjOQVUO7gcoezyu5y7yWEZF8QAngYABjMsYYk04gE0EcUEtEqolIAaA7MC1dmWnAA+7jbsD3Gqi6KmOMMV4FrGpIVc+KyABgDhAJfKiqG0TkJSBeVacBHwCfiMhWIBEnWYRCwKqd/Mzi9K+8EGdeiBEsTn8LapwBayw2xhiTN9idxcYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwS5nIiUE5GGIlJPRC4JdTy+EpErQx1DVkTk0VDHkB0RqSkiXb2M0ZUr5NVzEyCvxBus/yPrNeRBRK5U1c2hjgPA/ecfCUQBVYBVwKXAQmCQqh4JXXTZE5HfVLVKqOMAEJG/p18EPAMMB1DVN4IelBciMh+4U1UPuEOyPA8sAq4DRqvqf0MaoCuvn5uQu87PrAQrzjwxxEQQfYtzYucGHwIPqOoWEYkB/qaq14lIH5z7L7qFNjwQkZGZrQJKBjGU7LwIzAQ24MQGzr0txUIWkXflPMbZGgg0VdWDIlIEWAbkikRAHjg3wesXgNRVQK65IsgN/0dhd0WQzUF/QFWLBzOezIjIGlWt7/F8pao2ch9vUtWrQhddakzHgCeBU15W/1tVywY5JK9EpArwb2Ab8KKqnhSRbapaPcShpSEiq4DbVHW3e3XQQVWT3CHd16rq1SEOEcgb5yaAiCQBrwFnvax+QlVLBjci73LD/1E4XhE8SOYHvUeQY8nKryLyPPA90AVYDSAi+ck9bTtxwHpV/TH9ChGJDX443qnqb8CdItIZmCsib4Y6pkw8AXwrIlNwrl6+F5E5QHPgo5BGllZeODcBVgJTVXVF+hUi8nAI4slMyP+PwvGK4HvguUwO+nZVrRaCsDIQkZLAP3Em9VkDvKqqx0SkBHCVqi4LZXwAIlIaSFLVk6GOxVciUhSIxRnu/IYQh5OB+/ftCVyB80UtAfg6t7RdQd44NwFEpDaQqKr7vawrr6p7QxBWBrnh/ygcE0HID/qFEpFLVXVfqOMwxlxcctNlXFCoamJeSAIiUjrdTxlguYiUcpNZyIlIe4/HJUTkAxFZKyLjRaR8KGPzlC7OkiLyfi6NM1JE+onIMBFplm7dc6GKKz0RiRaR+SLyqYhUFpG5InJEROJEpGGo40vhnpOvishmEUkUkYMissldVjLU8flCRGYFYz9hlwhEpLiIvCIin4hIz3Tr/hequLw4AKzw+IkHLsep94wPYVyehns8/jfwO9ARp87zvZBE5J1nnK8Df5A743wPuBFnTo7/iohnt9YuoQnJq/8B/wd8A/wIvKeqJYAh7rrcYhJwCGipqqVVtQzQyl02KaSReRCRRpn8XAs0CEoMYVg1NAX4Bac7Xm/gDNBTVU959n4INRF5ErgZGKyq69xluaYNAzL0Flmtqg081qV5Hkp5KM61qlrPfZwP50O1LE4nhmWqmiu+bYvIqpRY0vdz91wXaiKyRVVr53RdsInIOZx7MLzN195EVQsHOoZw7DVUQ1W7uo+nisizOL0zOoUyqPRU9d8i8jnwpojsAl4AclvWvtTtqy1AcRERj4mFctPVZl6Js0DKA1U9C/QVkRdweufkmn7vQJKItMWZUVBF5HZVnSoiNwLnQhybp50i8jQwLqVh2K0K7AXsCmVg6WwC+qnqL+lXuP/7AZeb/gmCpaCIpL5vVf0XMAbnDs4yIYvKC1VNUNU7gQXAXKBIaCPKYAzOTVmXAONwvr0iIhVwuxTmEnklznjP9gwAVX0Rp+toVEgi8q4/Thfs3kA7oJWIHMK5ghkUysDSuRvnf3qhiBwSkUSc/6XSwF2hDCydWDL/LH4sGAGEY9XQ/wHfqup36Za3B/6rqrVCE1lG4owzcjnwE843rRqqul5E2qvq7NBG5/CMUVWPeyzPNTFC3okzPRH5WFXvD3UcWRGRFkAMsE5Vvw11PClE5Dpgs6oeEefu7CFAI5x7NIbnlqEw3Dg3qepRESmMM/xJQ2AjQYoz7BJBVkTkQVXNFTfuiMhA4G84l40NcMZw+dpdlyvaMkTkMWAAuThGyFNxpp/TW3AaN78HUNVcUX0pIstVNcZ93Ad4FJgKtAWmq+qrIQwvlYhsAOqrM23uaOAEMAVo4y7PFQ3wXuI8CXxBEOMMxzaCrKRchucGfYBrVfW4iEQBX4hIlKr+B++NSqHQl9wfI+SdOCvjfFt9H6c9SIBonB5ZuUl+j8d9gbaqul9EXsfphJErEgEQ4ba1AER7JPzFIrI6RDF5E/I4wy4RiMjazFYBuaZPOc7JcRxAVXeISEucD7Cq5J4Pr7wQI+SdOK/FqWN/Fqe32GoR+VNVF4Y4rvQiRKQUTr22pNy5q6onRMTbuD6hst7jKn+NiESraryIXIHTWzC3CHmcYZcIcD7s2+H0JfYkOH2ic4u9ItJAVVcDuN9mb8MZ+bFuSCM7Ly/ECHkkTlVNxuklNtn9vZfc+T9aAufeFsHpNVRRVX8XZ4z/3JRYHwb+I87NeAeApW4vnF3uutwi5HGGXRuBiHwAfKSqi72sG6+qPb28LOhEpBJwVlX/8LLuelVdEoKw0seR62OEvBNneiJyK3C9qv4z1LH4wm2QLa+q20MdiycRKQ5Uwx27KbeMMZReKOMMu0RgjDEmrXC8j8AYY4wHSwTGGBPmLBHkkIgcz75UmvItRWSGn/adX0RWelneW0TWiTOi5npxJmAx6fjzb+Hj/mJF5Klg7c/kjIjMlByOQioij4hIrr7B70Lkxh4JJnPNgTQNm25D6LNAI/cOykuAcqEI7mInIpGqGrSxdIK9v9xORPJ59Lf/y1T1lgt4zbv+2n9uYlcEF8j9drlARL4QZ7zzz0RE3HXt3WUr8Rg+WESKisiHIrJcRFalfHMXkf+IyFD3cTsRWSQe4yF5aA+kH5/8UuAYkNJP/nhKrw0RqSEis0VkhYj84A6zgIhUE5Gl7lXEyylXOem/MYvI2yLSy318rYgsdLc1R0QqussXiMgI9z39LM5wAylj67/uXqGsFefu3ky3k+7YjhWRkSLyo4hsE5FuPsS3Q5zhxVeLSLw4w/jOEZFfReQRj80XF5FvRGSLiLybcpxFpK17TFaKyGQ3oaZsd4T7t7zTY9+RIrJdHCVF5JyI3OCuWyQiKUOV1HGP0TZx7hZPef297jFbLSLviTMvMSJyXET+LSJrgKaZlUt3vG5xz7cV7nGb4S7P7HzrJSJfuufGL+IMu5KyrR7uebFeREZ4LD8uIq+JyAYR+U5EYjzeVyePY/KaOPMSrBWRfu7yiu4xWe1ut4WX9zDUfd16ERktkvq/tEBE3hKReGBQDs6fd0RkmRtfS/c4bBKRsR7ldohIWfc4fSMia9z93+2uf1VENrrv5XV3WepVnmR+7hcRkUnua78SkZ9EJDp9nLmKqtpPDn6A4+7vlsARoBJOQl2K8429EE7/31o4faonATPc1wwH7nUflwR+BoriDCa3AWc4gS04Ywp52/dyoEi6ZZHAHOA3nLuiO3qsmwfUch9fB3zvPp4G3O8+/lu69zTD4/Vv44zUmB/nHoty7vK7gQ/dxwtwJtgGuAX4zn3cH+c2+Xzu89JZbSfdexoLTHaPax1ga1bxuY93AP3dx28Ca3EGmisH7PV4fRJQ3T1uc4FuOIPQLQKKuuX+AQz12O7Tmfw9ZgNXA7fhzG3wLFAQ2O6uj3Xfb0F3HwfdY3AVMB3I75b7n8ffQ4G73MeZlvOIIeV8q+Y+n0D251svYBvO/QCFgJ04dzVfhnMelcOpLfgeuN0jrg7u46+Ab933Uh9Y7S7vizMNLO57jsfpDvkk8KzH+VrMy7Es7fH4E9zzGOf8+p/7OCfnz0Sc/7/OwFGc+0UicO5/aODxty0LdAXGeLy+BM5gdVs437OypMff9Klszv2ncOZoALgGOItzx3DIP78y+7Gqob9muaomgDOuPc4IkcdxPgh+cZd/ivMPAs5YLJ3kfL1xIaCKqm4SZ8yWRcATqvpr+h2JyOU486+mmV1NVc+JM2BeY5yxSd4UZ0KL14FmwGT3yxU4/5wA1+Oc/OD8040ga7VxTui57rYicSahSfGl+3sF50fJvAl4V91LeVVNFJFrstmOp6nq3GC1UXyfRSxlrJ51wCWqegw4JiKn5Hxd8HJV3QYgIhNwkncSTsJZ4sZVACexp/g8k/39ANyA82H3Cs6wIAtxkkKKb1T1FHBKRPbh3NDYBucu4jh3f4WBlClIz+GMh0M25VJcCWzT8333J5DN+eY+nqfuYGYishGoivPht0DdO4VF5DP3/U0FTuMkPnCO7ylVPSMi6zj/N28L1BP3Cg7nA7WWezw+FGdy+6nq3tiXTitxhowugvOlYQNOEoTzxz+789DTdFVVN769en5Ojw1uvJ4xrAP+7V4BzVDVH8SZDyIJ+MC9wsqsbcnbud8c+A+AOoNEZjaaQa5hieCvOeXx+BzZH08BuqrqFi/r6uJ8Y7wsk9e2x/nmn4E6Xz2W40xlORfnyuAN4LBmPumKtxtIzpK2urCQR9wbVLVpJttKOQ7ZHYPstuNtmymvyyq+9K9JTvf6ZI+40r/vlDF95qpqj0xiOZHJ8kU4Vz6XAUOBwThXHT94iQnOHx/BGSP/GS/bTNLz7QJZlfOF1/NNnNEuc3runnHPM/A4vqqa7H5opuzvMVXNcJ6KU212KzBWRN5Q1Y891hXCudqJVtVdIhJL2r9tyvG/kPMnq3MB9z38LCKNcL7Vvywi81T1JRGJwUnG3XAGLWydxX58OYa5lrUR+N9mIEpEarjPPT9c5gCPedR/pszyVBXn8rkh0MH9R03PW/sAInKZexKnaADsVNWjwHYRudMtJyJS3y2zBOjuPr7H47U7ceq0C7rfoNu4y7cA5USkqbut/CJyddaHgblAv5QPCXHmWb6Q7XjKLL6ciBGnjSQCp2phMc5AadeLSE03rqLijPOSneU4V13JqpqE8y2zH06CyMo8oJuIXOrur7R7DlxIuS1AdXEG08N9Tym8nm/ZvJ8b3XrzSJxzNyfjHM0B+rvf/BGRK9xjWRXnW/kYnAH10o/2mvKhf0CctpluePdXzx+vROQy4KSqfgq8BjRy4yihqjOBJ3CqwHy1BHe+AxGpQy4axiQzeTaD5VaqmiQifYFvROQkzrfDYu7qYcBbwFr3g2i7iHQEPsCpd9wjIg/hfGtq7H644P5T1lTVzV52mR943T2Zk4D9QErj6D3AO+KMYZIfp950Dc7AZuNF5B/A1x6x7xKRScB6YDuwyl1+2r3cHykiJXDOm7dwLt8z8z5whftez+DUwb59AdtJlVl8ORSH07ZQE5gPfOV+q+0FTBCRlOqz53Dq1LOK55Q4Y8Iscxf9gPPhuS6b1210/ybfuufBGZy2mp05Laeqf4rIo8BsETlB2mqpDOcbTntGZnH9LiJDcI6L4FRrfZ1ZeS/ex6keWekmn/3A7ThXSYPd8+A4kKb7paoeFpExOH/XP9K9B89yF3Ie+qIu8JqIJOMc4/44/7Nfu1crAvw9B9v7HzDOrXLb7MaXK+Y+yIwNMZEHiEhznEa/R7ItfGHbP66quWkqRJMDInKJOgPpCTAK+EVV3wx1XOHK/eKW3/1SWAP4DqitqqdDHFqm7IogD1BngLwMg+QZ4+ojIg/gNHKvAt4LcTzhrggw360iE+DR3JwEwK4IjDEm7FljsTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoS5/wfqfBFAp44dGQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df_sequence_missings['sequence ratios'].plot(kind='bar')\n", "\n", "plt.ylabel(\"Sequence Ration < 1\")\n", "plt.xlabel(\"Index / Sequence number where genomes are missing\")\n", "\n", "#add custom legend to bar chart\n", "plt.legend(['Index of Missing Sequences'], prop={'size': 20})" ] }, { "cell_type": "code", "execution_count": 17, "id": "d12ca912", "metadata": {}, "outputs": [], "source": [ "sequence1_All=''\n", "sequence2_All=''" ] }, { "cell_type": "code", "execution_count": 18, "id": "e5789852", "metadata": {}, "outputs": [], "source": [ "with open(input_file1) as file1:\n", " for lineno in file1:\n", " lineno=lineno.strip('\\n')\n", " sequence1_All+=lineno" ] }, { "cell_type": "code", "execution_count": 19, "id": "b6064bd9", "metadata": {}, "outputs": [], "source": [ "with open(input_file2) as file2:\n", " for lineno in file2:\n", " lineno=lineno.strip('\\n')\n", " sequence2_All+=lineno" ] }, { "cell_type": "code", "execution_count": 20, "id": "f8b08d7a", "metadata": {}, "outputs": [], "source": [ "def basic_Analysis(DNAseq):\n", " total_base = len(DNAseq)\n", " num_Adenine = DNAseq.count('A')\n", " num_Guanine = DNAseq.count('G')\n", " num_Thymine = DNAseq.count('T')\n", " num_Cytosine = DNAseq.count('C')\n", " \n", " if total_base != num_Adenine + num_Guanine + num_Thymine + num_Cytosine:\n", " print('Something is not right')\n", " else : pass\n", " \n", " A_percent = num_Adenine / total_base\n", " G_percent = num_Guanine / total_base\n", " T_percent = num_Thymine / total_base\n", " C_percent = num_Cytosine / total_base\n", " \n", " #visualization\n", " x = np.arange(4)\n", " bases = ['Adenine', 'Guanine', 'Thymine' ,'Cytosine']\n", " values = [num_Adenine, num_Guanine, num_Thymine, num_Cytosine]\n", " plt.bar(x,values)\n", " plt.xticks(x, bases)\n", " plt.show()\n", " table = [['total base',total_base,'Percentage',str('100%')],\n", " ['Adenine:',num_Adenine, 'Percentage:',str(round(A_percent*100,2))+'%'],\n", " ['Guanine:',num_Guanine, 'Percentage:',str(round(G_percent*100,2))+'%'],\n", " ['Thynime:',num_Thymine, 'Percentage:',str(round(T_percent*100,2))+'%'],\n", " ['Cytosine:',num_Cytosine, 'Percentage:',str(round(C_percent*100,2))+'%']]\n", " print(tabulate(table))\n", " print('GC content:', round((((num_Guanine + num_Cytosine) / total_base)*100),2),'%')" ] }, { "cell_type": "code", "execution_count": 21, "id": "61655d2a", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVUUlEQVR4nO3df5TddX3n8eerZFERISBTikmOyZGs3UAphVnIrv1hxRMCeky6pQrrlkCz5myLu3Wtq2B3mxbLEXd7ykqrdFNJCR7Kj0UsOYLGHMDVtYBMAIEQKSM/JDkgIwnolhUMvveP+5nlMsyQmbmTmYQ8H+fcc7/f9/fz+X4/98vNvO73x72kqpAk7dt+ZqYHIEmaeYaBJMkwkCQZBpIkDANJEjBrpgcwWYcddljNnz9/pochSXuVTZs2/aCq+kbW99owmD9/PgMDAzM9DEnaqyR5dLS6p4kkSbsOgyRrkzyZ5L5Rlv1BkkpyWJtPkouTDCa5J8lxXW1XJHmwPVZ01Y9Pcm/rc3GSTNWLkySNz3iODC4Dlo4sJpkHLAG+11U+BVjYHquAS1rbQ4HVwInACcDqJIe0PpcAH+jq97JtSZJ2r12GQVV9Hdg+yqKLgI8C3b9nsQy4vDpuA2YnOQI4GdhYVduragewEVjalh1UVbdV53cxLgeW9/SKJEkTNqlrBkmWAduq6tsjFs0BHuua39pqr1TfOkp9rO2uSjKQZGBoaGgyQ5ckjWLCYZDkAODjwB9N/XBeWVWtqar+qurv63vZnVGSpEmazJHBW4AFwLeTPALMBe5M8nPANmBeV9u5rfZK9bmj1CVJ02jCYVBV91bVz1bV/KqaT+fUznFV9QSwHjiz3VW0GHimqh4HNgBLkhzSLhwvATa0ZT9MsrjdRXQmcP0UvTZJ0jiN59bSK4Fbgbcm2Zpk5Ss0vxF4CBgE/hr4PYCq2g58ArijPc5vNVqbz7U+3wW+PLmXIkmarOyt/3Ob/v7+8hvI2hvNP/eGmR7CjHrkwnfN9BD2aUk2VVX/yLrfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKAWTM9gJngTwj7E8KSXsojA0mSYSBJMgwkSRgGkiQMA0kShoEkiXGEQZK1SZ5Mcl9X7b8l+U6Se5J8McnsrmXnJRlM8kCSk7vqS1ttMMm5XfUFSW5v9auT7D+Fr0+SNA7jOTK4DFg6orYROLqqjgH+ATgPIMki4HTgqNbns0n2S7If8BngFGARcEZrC/Ap4KKqOhLYAazs6RVJkiZsl2FQVV8Hto+ofbWqdrbZ24C5bXoZcFVVPVdVDwODwAntMVhVD1XV88BVwLIkAd4BXNv6rwOW9/aSJEkTNRXXDH4H+HKbngM81rVsa6uNVX8j8HRXsAzXJUnTqKcwSPKHwE7giqkZzi63tyrJQJKBoaGh6dikJO0TJh0GSc4C3g28v6qqlbcB87qazW21sepPAbOTzBpRH1VVramq/qrq7+vrm+zQJUkjTCoMkiwFPgq8p6qe7Vq0Hjg9yWuSLAAWAt8C7gAWtjuH9qdzkXl9C5FbgNNa/xXA9ZN7KZKkyRrPraVXArcCb02yNclK4C+BNwAbk9yd5K8AqmozcA1wP/AV4JyqeqFdE/ggsAHYAlzT2gJ8DPhwkkE61xAundJXKEnapV3+hHVVnTFKecw/2FV1AXDBKPUbgRtHqT9E524jSdIM8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRLjCIMka5M8meS+rtqhSTYmebA9H9LqSXJxksEk9yQ5rqvPitb+wSQruurHJ7m39bk4Sab6RUqSXtl4jgwuA5aOqJ0L3FRVC4Gb2jzAKcDC9lgFXAKd8ABWAycCJwCrhwOktflAV7+R25Ik7Wa7DIOq+jqwfUR5GbCuTa8DlnfVL6+O24DZSY4ATgY2VtX2qtoBbASWtmUHVdVtVVXA5V3rkiRNk8leMzi8qh5v008Ah7fpOcBjXe22ttor1beOUh9VklVJBpIMDA0NTXLokqSRer6A3D7R1xSMZTzbWlNV/VXV39fXNx2blKR9wmTD4PvtFA/t+clW3wbM62o3t9VeqT53lLokaRpNNgzWA8N3BK0Aru+qn9nuKloMPNNOJ20AliQ5pF04XgJsaMt+mGRxu4vozK51SZKmyaxdNUhyJfB24LAkW+ncFXQhcE2SlcCjwHtb8xuBU4FB4FngbICq2p7kE8Adrd35VTV8Ufr36Nyx9Drgy+0hSZpGuwyDqjpjjEUnjdK2gHPGWM9aYO0o9QHg6F2NQ5K0+/gNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIYx//2Uhpp/rk3zPQQZtQjF75rpocgTTmPDCRJhoEkqccwSPIfk2xOcl+SK5O8NsmCJLcnGUxydZL9W9vXtPnBtnx+13rOa/UHkpzc42uSJE3QpMMgyRzgPwD9VXU0sB9wOvAp4KKqOhLYAaxsXVYCO1r9otaOJItav6OApcBnk+w32XFJkiau19NEs4DXJZkFHAA8DrwDuLYtXwcsb9PL2jxt+UlJ0upXVdVzVfUwMAic0OO4JEkTMOkwqKptwJ8B36MTAs8Am4Cnq2pna7YVmNOm5wCPtb47W/s3dtdH6fMSSVYlGUgyMDQ0NNmhS5JG6OU00SF0PtUvAN4EvJ7OaZ7dpqrWVFV/VfX39fXtzk1J0j6ll9NE7wQerqqhqvoJcB3wNmB2O20EMBfY1qa3AfMA2vKDgae666P0kSRNg17C4HvA4iQHtHP/JwH3A7cAp7U2K4Dr2/T6Nk9bfnNVVauf3u42WgAsBL7Vw7gkSRM06W8gV9XtSa4F7gR2AncBa4AbgKuS/GmrXdq6XAp8PskgsJ3OHURU1eYk19AJkp3AOVX1wmTHJUmauJ5+jqKqVgOrR5QfYpS7garqx8BvjbGeC4ALehmLpH2DP4eye34OxW8gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJLMTnJtku8k2ZLkXyQ5NMnGJA+250Na2yS5OMlgknuSHNe1nhWt/YNJVvT6oiRJE9PrkcGnga9U1c8DvwhsAc4FbqqqhcBNbR7gFGBhe6wCLgFIciiwGjgROAFYPRwgkqTpMekwSHIw8KvApQBV9XxVPQ0sA9a1ZuuA5W16GXB5ddwGzE5yBHAysLGqtlfVDmAjsHSy45IkTVwvRwYLgCHgb5LcleRzSV4PHF5Vj7c2TwCHt+k5wGNd/be22lj1l0myKslAkoGhoaEehi5J6tZLGMwCjgMuqapfAv6RF08JAVBVBVQP23iJqlpTVf1V1d/X1zdVq5WkfV4vYbAV2FpVt7f5a+mEw/fb6R/a85Nt+TZgXlf/ua02Vl2SNE0mHQZV9QTwWJK3ttJJwP3AemD4jqAVwPVtej1wZruraDHwTDudtAFYkuSQduF4SatJkqbJrB77/3vgiiT7Aw8BZ9MJmGuSrAQeBd7b2t4InAoMAs+2tlTV9iSfAO5o7c6vqu09jkuSNAE9hUFV3Q30j7LopFHaFnDOGOtZC6ztZSySpMnzG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkpiAMkuyX5K4kX2rzC5LcnmQwydVJ9m/117T5wbZ8ftc6zmv1B5Kc3OuYJEkTMxVHBr8PbOma/xRwUVUdCewAVrb6SmBHq1/U2pFkEXA6cBSwFPhskv2mYFySpHHqKQySzAXeBXyuzQd4B3Bta7IOWN6ml7V52vKTWvtlwFVV9VxVPQwMAif0Mi5J0sT0emTw34GPAj9t828Enq6qnW1+KzCnTc8BHgNoy59p7f9/fZQ+L5FkVZKBJANDQ0M9Dl2SNGzSYZDk3cCTVbVpCsfziqpqTVX1V1V/X1/fdG1Wkl71ZvXQ923Ae5KcCrwWOAj4NDA7yaz26X8usK213wbMA7YmmQUcDDzVVR/W3UeSNA0mfWRQVedV1dyqmk/nAvDNVfV+4BbgtNZsBXB9m17f5mnLb66qavXT291GC4CFwLcmOy5J0sT1cmQwlo8BVyX5U+Au4NJWvxT4fJJBYDudAKGqNie5Brgf2AmcU1Uv7IZxSZLGMCVhUFVfA77Wph9ilLuBqurHwG+N0f8C4IKpGIskaeL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJHsIgybwktyS5P8nmJL/f6ocm2ZjkwfZ8SKsnycVJBpPck+S4rnWtaO0fTLKi95clSZqIXo4MdgJ/UFWLgMXAOUkWAecCN1XVQuCmNg9wCrCwPVYBl0AnPIDVwInACcDq4QCRJE2PSYdBVT1eVXe26R8BW4A5wDJgXWu2DljeppcBl1fHbcDsJEcAJwMbq2p7Ve0ANgJLJzsuSdLETck1gyTzgV8CbgcOr6rH26IngMPb9Bzgsa5uW1ttrLokaZr0HAZJDgS+AHyoqn7YvayqCqhet9G1rVVJBpIMDA0NTdVqJWmf11MYJPkndILgiqq6rpW/307/0J6fbPVtwLyu7nNbbaz6y1TVmqrqr6r+vr6+XoYuSerSy91EAS4FtlTVn3ctWg8M3xG0Ari+q35mu6toMfBMO520AViS5JB24XhJq0mSpsmsHvq+Dfht4N4kd7fax4ELgWuSrAQeBd7blt0InAoMAs8CZwNU1fYknwDuaO3Or6rtPYxLkjRBkw6DqvrfQMZYfNIo7Qs4Z4x1rQXWTnYskqTe+A1kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIk9KAySLE3yQJLBJOfO9HgkaV+yR4RBkv2AzwCnAIuAM5IsmtlRSdK+Y48IA+AEYLCqHqqq54GrgGUzPCZJ2mekqmZ6DCQ5DVhaVf+2zf82cGJVfXBEu1XAqjb7VuCBaR3o1DkM+MFMD2Iv5v7rjfuvN3v7/ntzVfWNLM6aiZFMVlWtAdbM9Dh6lWSgqvpnehx7K/dfb9x/vXm17r895TTRNmBe1/zcVpMkTYM9JQzuABYmWZBkf+B0YP0Mj0mS9hl7xGmiqtqZ5IPABmA/YG1VbZ7hYe1Oe/2prhnm/uuN+683r8r9t0dcQJYkzaw95TSRJGkGGQaSJMNgIpIsT1JJfn6M5V9LMqlbzpK859X8MxxJDk/yt0keSrIpya1JfmOKt/Gq2odJ3pjk7vZ4Ism2Nv10kvt3w/belOTaqV7vTEvyc0muSvLd9t67Mck/HaPtsUlOneR29ur95zWDCUhyNfAm4OaqWj3K8q8BH6mqgeke254sSYC/B9ZV1V+12puB91TVX8zo4PYSSf4Y+D9V9WdJ5gNfqqqjZ3ZUe74x3nu/CBxUVd8Ypf1ZQP/IL7zuCzwyGKckBwK/DKykc+srSV7XPnFsSfJF4HVd7Ze0T793JvmfrT9JHknyJ61+7/BRRpKzkvxlm74sycVJ/r59kj6ta73/KckdSe5J8ifTtwd68g7g+eF/jABV9WhV/UX36wZI8qUkb2/TlyQZSLK5+7Xuo/twpP2S/HXbN19t78W3JLlzuEGShcPzbZ99sh1ZDCQ5LsmG9mn537U285Pc16bPSnJdkq8keTDJf+1a76jv7T3UrwM/GfHe+zbwgSTLh2tJrkiyDDgfeF/bT+9LcmiSv2vvlduSHNPa/1rXUdtdSd6wt+8/w2D8lgFfqap/AJ5Kcjzwu8CzVfXPgNXA8QBJDgP+M/DOqjoOGAA+3LWuH7T6JcBHxtjeEXTC593AhW29S4CFdH7L6Vjg+CS/OpUvcjc5Crhzl61e7g/bNz2PAX5t+B9is6/tw5EWAp+pqqOAp4HfrKrvAs8kOba1ORv4m64+36uqY4FvAJcBpwGLgbEC8VjgfcAv0PkDOW8c7+09zdHAplHqlwJnASQ5GPiXwA3AHwFXV9WxVXU1nX1zV1UdA3wcuLz1/whwTtufvwL831G2cSx70f7bI75nsJc4A/h0m76qzR8JXAxQVfckuactX0zn11e/2TlKZX/g1q51XdeeNwH/aozt/V1V/RS4P8nhrbakPe5q8wfS+aPw9cm/rOmX5DN0/kg/T+fXasfy3nR+j2oWnT/si4DhfbxP70Pg4aq6u01vAua36c8BZyf5MJ0/RCd09Rn+Iue9wIFV9SPgR0meSzJ7lG3cVFXPAKRzjeLNwGxe+b29V6iq/5Xks0n6gN8EvtC+7zSy6S+35VTVzelcxzkI+Cbw50muAK6rqq2j9N2r9p9hMA5JDqVzquMXkhSdL8YVL/5BeVkXYGNVnTHG8ufa8wuM/d/gua7pdD1/sqr+x3jHvofYTPsHBVBV57RPSAPATl56hPpagCQL6Hz6+udVtSPJZcPLmn1tH47U/dpe4MVTlF+gc5R6M7Cpqp4apc9PR/T/KaPvw5HbmMWu39t7ms10joBGcznwb+ic9j17IiutqguT3ACcSucP+8nAj0c026v2n6eJxuc04PNV9eaqml9V84CH6Xwi+9cASY6mczoD4DbgbUmObMtenzHuXpigDcDv5MXrD3OS/OwUrHd3uxl4bZLf7aod0J4fAY5N8jNJ5vHiJ9mDgH+kc9rjcDr/r4upsLfuw3Gpqh/TeY2X8NJTRFNld723d5ebgde0I0wAkhyT5FfonCr7EEBVDd+d9SPgDV39vwG8v/V7O53Tkz9M8paqureqPkXn53RGvcNwFHvs/jMMxucM4Isjal8AFgAHJtlC58LTJoCqGqJzPvLKduroVsb/ZhlTVX0V+Fvg1iT3Atfy0jfuHqk6t6wtp3Pe/+Ek3wLWAR+jc7j9MHA/nVNud7Y+36Zz5PUdOq/5m1M0lr1yH07QFXQ+7X91qle8u97bu0t77/0G8M52sXwz8Engiar6PrCFl4bmLcCi4QvIwB/Tua50D53rTitauw8lua/VfwJ8eZzj2WP3n7eWSq8yST4CHFxV/2Wmx7InS3IAnesnxw2f29+Xec1AehVJ5xbnt9C5xqUxJHknnTuKLjIIOjwykCR5zUCSZBhIkjAMJEkYBpIkDANJEvD/AG3vovyAnWp7AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "---------- ----- ----------- ------\n", "total base 42483 Percentage 100%\n", "Adenine: 12688 Percentage: 29.87%\n", "Guanine: 8393 Percentage: 19.76%\n", "Thynime: 13709 Percentage: 32.27%\n", "Cytosine: 7693 Percentage: 18.11%\n", "---------- ----- ----------- ------\n", "GC content: 37.86 %\n" ] } ], "source": [ "basic_Analysis(sequence1_All)" ] }, { "cell_type": "code", "execution_count": 22, "id": "99fef932", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVUUlEQVR4nO3df5TddX3n8eerZFERISBTikmOyZGs3UAphVnIrv1hxRMCeky6pQrrlkCz5myLu3Wtq2B3mxbLEXd7ykqrdFNJCR7Kj0UsOYLGHMDVtYBMAIEQKSM/JDkgIwnolhUMvveP+5nlMsyQmbmTmYQ8H+fcc7/f9/fz+X4/98vNvO73x72kqpAk7dt+ZqYHIEmaeYaBJMkwkCQZBpIkDANJEjBrpgcwWYcddljNnz9/pochSXuVTZs2/aCq+kbW99owmD9/PgMDAzM9DEnaqyR5dLS6p4kkSbsOgyRrkzyZ5L5Rlv1BkkpyWJtPkouTDCa5J8lxXW1XJHmwPVZ01Y9Pcm/rc3GSTNWLkySNz3iODC4Dlo4sJpkHLAG+11U+BVjYHquAS1rbQ4HVwInACcDqJIe0PpcAH+jq97JtSZJ2r12GQVV9Hdg+yqKLgI8C3b9nsQy4vDpuA2YnOQI4GdhYVduragewEVjalh1UVbdV53cxLgeW9/SKJEkTNqlrBkmWAduq6tsjFs0BHuua39pqr1TfOkp9rO2uSjKQZGBoaGgyQ5ckjWLCYZDkAODjwB9N/XBeWVWtqar+qurv63vZnVGSpEmazJHBW4AFwLeTPALMBe5M8nPANmBeV9u5rfZK9bmj1CVJ02jCYVBV91bVz1bV/KqaT+fUznFV9QSwHjiz3VW0GHimqh4HNgBLkhzSLhwvATa0ZT9MsrjdRXQmcP0UvTZJ0jiN59bSK4Fbgbcm2Zpk5Ss0vxF4CBgE/hr4PYCq2g58ArijPc5vNVqbz7U+3wW+PLmXIkmarOyt/3Ob/v7+8hvI2hvNP/eGmR7CjHrkwnfN9BD2aUk2VVX/yLrfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKAWTM9gJngTwj7E8KSXsojA0mSYSBJMgwkSRgGkiQMA0kShoEkiXGEQZK1SZ5Mcl9X7b8l+U6Se5J8McnsrmXnJRlM8kCSk7vqS1ttMMm5XfUFSW5v9auT7D+Fr0+SNA7jOTK4DFg6orYROLqqjgH+ATgPIMki4HTgqNbns0n2S7If8BngFGARcEZrC/Ap4KKqOhLYAazs6RVJkiZsl2FQVV8Hto+ofbWqdrbZ24C5bXoZcFVVPVdVDwODwAntMVhVD1XV88BVwLIkAd4BXNv6rwOW9/aSJEkTNRXXDH4H+HKbngM81rVsa6uNVX8j8HRXsAzXJUnTqKcwSPKHwE7giqkZzi63tyrJQJKBoaGh6dikJO0TJh0GSc4C3g28v6qqlbcB87qazW21sepPAbOTzBpRH1VVramq/qrq7+vrm+zQJUkjTCoMkiwFPgq8p6qe7Vq0Hjg9yWuSLAAWAt8C7gAWtjuH9qdzkXl9C5FbgNNa/xXA9ZN7KZKkyRrPraVXArcCb02yNclK4C+BNwAbk9yd5K8AqmozcA1wP/AV4JyqeqFdE/ggsAHYAlzT2gJ8DPhwkkE61xAundJXKEnapV3+hHVVnTFKecw/2FV1AXDBKPUbgRtHqT9E524jSdIM8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRLjCIMka5M8meS+rtqhSTYmebA9H9LqSXJxksEk9yQ5rqvPitb+wSQruurHJ7m39bk4Sab6RUqSXtl4jgwuA5aOqJ0L3FRVC4Gb2jzAKcDC9lgFXAKd8ABWAycCJwCrhwOktflAV7+R25Ik7Wa7DIOq+jqwfUR5GbCuTa8DlnfVL6+O24DZSY4ATgY2VtX2qtoBbASWtmUHVdVtVVXA5V3rkiRNk8leMzi8qh5v008Ah7fpOcBjXe22ttor1beOUh9VklVJBpIMDA0NTXLokqSRer6A3D7R1xSMZTzbWlNV/VXV39fXNx2blKR9wmTD4PvtFA/t+clW3wbM62o3t9VeqT53lLokaRpNNgzWA8N3BK0Aru+qn9nuKloMPNNOJ20AliQ5pF04XgJsaMt+mGRxu4vozK51SZKmyaxdNUhyJfB24LAkW+ncFXQhcE2SlcCjwHtb8xuBU4FB4FngbICq2p7kE8Adrd35VTV8Ufr36Nyx9Drgy+0hSZpGuwyDqjpjjEUnjdK2gHPGWM9aYO0o9QHg6F2NQ5K0+/gNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIYx//2Uhpp/rk3zPQQZtQjF75rpocgTTmPDCRJhoEkqccwSPIfk2xOcl+SK5O8NsmCJLcnGUxydZL9W9vXtPnBtnx+13rOa/UHkpzc42uSJE3QpMMgyRzgPwD9VXU0sB9wOvAp4KKqOhLYAaxsXVYCO1r9otaOJItav6OApcBnk+w32XFJkiau19NEs4DXJZkFHAA8DrwDuLYtXwcsb9PL2jxt+UlJ0upXVdVzVfUwMAic0OO4JEkTMOkwqKptwJ8B36MTAs8Am4Cnq2pna7YVmNOm5wCPtb47W/s3dtdH6fMSSVYlGUgyMDQ0NNmhS5JG6OU00SF0PtUvAN4EvJ7OaZ7dpqrWVFV/VfX39fXtzk1J0j6ll9NE7wQerqqhqvoJcB3wNmB2O20EMBfY1qa3AfMA2vKDgae666P0kSRNg17C4HvA4iQHtHP/JwH3A7cAp7U2K4Dr2/T6Nk9bfnNVVauf3u42WgAsBL7Vw7gkSRM06W8gV9XtSa4F7gR2AncBa4AbgKuS/GmrXdq6XAp8PskgsJ3OHURU1eYk19AJkp3AOVX1wmTHJUmauJ5+jqKqVgOrR5QfYpS7garqx8BvjbGeC4ALehmLpH2DP4eye34OxW8gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJLMTnJtku8k2ZLkXyQ5NMnGJA+250Na2yS5OMlgknuSHNe1nhWt/YNJVvT6oiRJE9PrkcGnga9U1c8DvwhsAc4FbqqqhcBNbR7gFGBhe6wCLgFIciiwGjgROAFYPRwgkqTpMekwSHIw8KvApQBV9XxVPQ0sA9a1ZuuA5W16GXB5ddwGzE5yBHAysLGqtlfVDmAjsHSy45IkTVwvRwYLgCHgb5LcleRzSV4PHF5Vj7c2TwCHt+k5wGNd/be22lj1l0myKslAkoGhoaEehi5J6tZLGMwCjgMuqapfAv6RF08JAVBVBVQP23iJqlpTVf1V1d/X1zdVq5WkfV4vYbAV2FpVt7f5a+mEw/fb6R/a85Nt+TZgXlf/ua02Vl2SNE0mHQZV9QTwWJK3ttJJwP3AemD4jqAVwPVtej1wZruraDHwTDudtAFYkuSQduF4SatJkqbJrB77/3vgiiT7Aw8BZ9MJmGuSrAQeBd7b2t4InAoMAs+2tlTV9iSfAO5o7c6vqu09jkuSNAE9hUFV3Q30j7LopFHaFnDOGOtZC6ztZSySpMnzG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkpiAMkuyX5K4kX2rzC5LcnmQwydVJ9m/117T5wbZ8ftc6zmv1B5Kc3OuYJEkTMxVHBr8PbOma/xRwUVUdCewAVrb6SmBHq1/U2pFkEXA6cBSwFPhskv2mYFySpHHqKQySzAXeBXyuzQd4B3Bta7IOWN6ml7V52vKTWvtlwFVV9VxVPQwMAif0Mi5J0sT0emTw34GPAj9t828Enq6qnW1+KzCnTc8BHgNoy59p7f9/fZQ+L5FkVZKBJANDQ0M9Dl2SNGzSYZDk3cCTVbVpCsfziqpqTVX1V1V/X1/fdG1Wkl71ZvXQ923Ae5KcCrwWOAj4NDA7yaz26X8usK213wbMA7YmmQUcDDzVVR/W3UeSNA0mfWRQVedV1dyqmk/nAvDNVfV+4BbgtNZsBXB9m17f5mnLb66qavXT291GC4CFwLcmOy5J0sT1cmQwlo8BVyX5U+Au4NJWvxT4fJJBYDudAKGqNie5Brgf2AmcU1Uv7IZxSZLGMCVhUFVfA77Wph9ilLuBqurHwG+N0f8C4IKpGIskaeL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJHsIgybwktyS5P8nmJL/f6ocm2ZjkwfZ8SKsnycVJBpPck+S4rnWtaO0fTLKi95clSZqIXo4MdgJ/UFWLgMXAOUkWAecCN1XVQuCmNg9wCrCwPVYBl0AnPIDVwInACcDq4QCRJE2PSYdBVT1eVXe26R8BW4A5wDJgXWu2DljeppcBl1fHbcDsJEcAJwMbq2p7Ve0ANgJLJzsuSdLETck1gyTzgV8CbgcOr6rH26IngMPb9Bzgsa5uW1ttrLokaZr0HAZJDgS+AHyoqn7YvayqCqhet9G1rVVJBpIMDA0NTdVqJWmf11MYJPkndILgiqq6rpW/307/0J6fbPVtwLyu7nNbbaz6y1TVmqrqr6r+vr6+XoYuSerSy91EAS4FtlTVn3ctWg8M3xG0Ari+q35mu6toMfBMO520AViS5JB24XhJq0mSpsmsHvq+Dfht4N4kd7fax4ELgWuSrAQeBd7blt0InAoMAs8CZwNU1fYknwDuaO3Or6rtPYxLkjRBkw6DqvrfQMZYfNIo7Qs4Z4x1rQXWTnYskqTe+A1kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIk9KAySLE3yQJLBJOfO9HgkaV+yR4RBkv2AzwCnAIuAM5IsmtlRSdK+Y48IA+AEYLCqHqqq54GrgGUzPCZJ2mekqmZ6DCQ5DVhaVf+2zf82cGJVfXBEu1XAqjb7VuCBaR3o1DkM+MFMD2Iv5v7rjfuvN3v7/ntzVfWNLM6aiZFMVlWtAdbM9Dh6lWSgqvpnehx7K/dfb9x/vXm17r895TTRNmBe1/zcVpMkTYM9JQzuABYmWZBkf+B0YP0Mj0mS9hl7xGmiqtqZ5IPABmA/YG1VbZ7hYe1Oe/2prhnm/uuN+683r8r9t0dcQJYkzaw95TSRJGkGGQaSJMNgIpIsT1JJfn6M5V9LMqlbzpK859X8MxxJDk/yt0keSrIpya1JfmOKt/Gq2odJ3pjk7vZ4Ism2Nv10kvt3w/belOTaqV7vTEvyc0muSvLd9t67Mck/HaPtsUlOneR29ur95zWDCUhyNfAm4OaqWj3K8q8BH6mqgeke254sSYC/B9ZV1V+12puB91TVX8zo4PYSSf4Y+D9V9WdJ5gNfqqqjZ3ZUe74x3nu/CBxUVd8Ypf1ZQP/IL7zuCzwyGKckBwK/DKykc+srSV7XPnFsSfJF4HVd7Ze0T793JvmfrT9JHknyJ61+7/BRRpKzkvxlm74sycVJ/r59kj6ta73/KckdSe5J8ifTtwd68g7g+eF/jABV9WhV/UX36wZI8qUkb2/TlyQZSLK5+7Xuo/twpP2S/HXbN19t78W3JLlzuEGShcPzbZ99sh1ZDCQ5LsmG9mn537U285Pc16bPSnJdkq8keTDJf+1a76jv7T3UrwM/GfHe+zbwgSTLh2tJrkiyDDgfeF/bT+9LcmiSv2vvlduSHNPa/1rXUdtdSd6wt+8/w2D8lgFfqap/AJ5Kcjzwu8CzVfXPgNXA8QBJDgP+M/DOqjoOGAA+3LWuH7T6JcBHxtjeEXTC593AhW29S4CFdH7L6Vjg+CS/OpUvcjc5Crhzl61e7g/bNz2PAX5t+B9is6/tw5EWAp+pqqOAp4HfrKrvAs8kOba1ORv4m64+36uqY4FvAJcBpwGLgbEC8VjgfcAv0PkDOW8c7+09zdHAplHqlwJnASQ5GPiXwA3AHwFXV9WxVXU1nX1zV1UdA3wcuLz1/whwTtufvwL831G2cSx70f7bI75nsJc4A/h0m76qzR8JXAxQVfckuactX0zn11e/2TlKZX/g1q51XdeeNwH/aozt/V1V/RS4P8nhrbakPe5q8wfS+aPw9cm/rOmX5DN0/kg/T+fXasfy3nR+j2oWnT/si4DhfbxP70Pg4aq6u01vAua36c8BZyf5MJ0/RCd09Rn+Iue9wIFV9SPgR0meSzJ7lG3cVFXPAKRzjeLNwGxe+b29V6iq/5Xks0n6gN8EvtC+7zSy6S+35VTVzelcxzkI+Cbw50muAK6rqq2j9N2r9p9hMA5JDqVzquMXkhSdL8YVL/5BeVkXYGNVnTHG8ufa8wuM/d/gua7pdD1/sqr+x3jHvofYTPsHBVBV57RPSAPATl56hPpagCQL6Hz6+udVtSPJZcPLmn1tH47U/dpe4MVTlF+gc5R6M7Cpqp4apc9PR/T/KaPvw5HbmMWu39t7ms10joBGcznwb+ic9j17IiutqguT3ACcSucP+8nAj0c026v2n6eJxuc04PNV9eaqml9V84CH6Xwi+9cASY6mczoD4DbgbUmObMtenzHuXpigDcDv5MXrD3OS/OwUrHd3uxl4bZLf7aod0J4fAY5N8jNJ5vHiJ9mDgH+kc9rjcDr/r4upsLfuw3Gpqh/TeY2X8NJTRFNld723d5ebgde0I0wAkhyT5FfonCr7EEBVDd+d9SPgDV39vwG8v/V7O53Tkz9M8paqureqPkXn53RGvcNwFHvs/jMMxucM4Isjal8AFgAHJtlC58LTJoCqGqJzPvLKduroVsb/ZhlTVX0V+Fvg1iT3Atfy0jfuHqk6t6wtp3Pe/+Ek3wLWAR+jc7j9MHA/nVNud7Y+36Zz5PUdOq/5m1M0lr1yH07QFXQ+7X91qle8u97bu0t77/0G8M52sXwz8Engiar6PrCFl4bmLcCi4QvIwB/Tua50D53rTitauw8lua/VfwJ8eZzj2WP3n7eWSq8yST4CHFxV/2Wmx7InS3IAnesnxw2f29+Xec1AehVJ5xbnt9C5xqUxJHknnTuKLjIIOjwykCR5zUCSZBhIkjAMJEkYBpIkDANJEvD/AG3vovyAnWp7AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "---------- ----- ----------- ------\n", "total base 42483 Percentage 100%\n", "Adenine: 12686 Percentage: 29.86%\n", "Guanine: 8394 Percentage: 19.76%\n", "Thynime: 13713 Percentage: 32.28%\n", "Cytosine: 7690 Percentage: 18.1%\n", "---------- ----- ----------- ------\n", "GC content: 37.86 %\n" ] } ], "source": [ "basic_Analysis(sequence2_All)" ] }, { "cell_type": "code", "execution_count": 23, "id": "2b164b49", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
line nosequence ratios
011.000000
121.000000
231.000000
341.000000
451.000000
.........
6076081.000000
6086091.000000
6096101.000000
6106111.000000
6116120.989474
\n", "

612 rows × 2 columns

\n", "
" ], "text/plain": [ " line no sequence ratios\n", "0 1 1.000000\n", "1 2 1.000000\n", "2 3 1.000000\n", "3 4 1.000000\n", "4 5 1.000000\n", ".. ... ...\n", "607 608 1.000000\n", "608 609 1.000000\n", "609 610 1.000000\n", "610 611 1.000000\n", "611 612 0.989474\n", "\n", "[612 rows x 2 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sequence" ] }, { "cell_type": "code", "execution_count": 24, "id": "5e3758a9", "metadata": {}, "outputs": [], "source": [ "def Fapriori(itemSetList, minSup, minConf):\n", " C1ItemSet = getItemSetFromList(itemSetList)\n", " # Final result global frequent itemset\n", " globalFreqItemSet = dict()\n", " # Storing global itemset with support count\n", " globalItemSetWithSup = defaultdict(int)\n", "\n", " L1ItemSet = getAboveMinSup(\n", " C1ItemSet, itemSetList, minSup, globalItemSetWithSup)\n", " currentLSet = L1ItemSet\n", " k = 2\n", "\n", " # Calculating frequent item set\n", " while(currentLSet):\n", " # Storing frequent itemset\n", " globalFreqItemSet[k-1] = currentLSet\n", " # Self-joining Lk\n", " candidateSet = getUnion(currentLSet, k)\n", " # Perform subset testing and remove pruned supersets\n", " candidateSet = pruning(candidateSet, currentLSet, k-1)\n", " # Scanning itemSet for counting support\n", " currentLSet = getAboveMinSup(\n", " candidateSet, itemSetList, minSup, globalItemSetWithSup)\n", " k += 1\n", "\n", " rules = FassociationRule(globalFreqItemSet, globalItemSetWithSup, minConf)\n", " rules.sort(key=lambda x: x[2])\n", " return globalFreqItemSet, rules" ] }, { "cell_type": "code", "execution_count": 25, "id": "88d8387a", "metadata": {}, "outputs": [], "source": [ "def FaprioriFromFile(fname, minSup, minConf):\n", " C1ItemSet, itemSetList = getFromFile(fname)\n", "\n", " # Final result global frequent itemset\n", " globalFreqItemSet = dict()\n", " # Storing global itemset with support count\n", " globalItemSetWithSup = defaultdict(int)\n", "\n", " L1ItemSet = getAboveMinSup(\n", " C1ItemSet, itemSetList, minSup, globalItemSetWithSup)\n", " currentLSet = L1ItemSet\n", " k = 2\n", "\n", " # Calculating frequent item set\n", " while(currentLSet):\n", " # Storing frequent itemset\n", " globalFreqItemSet[k-1] = currentLSet\n", " # Self-joining Lk\n", " candidateSet = getUnion(currentLSet, k)\n", " # Perform subset testing and remove pruned supersets\n", " candidateSet = pruning(candidateSet, currentLSet, k-1)\n", " # Scanning itemSet for counting support\n", " currentLSet = getAboveMinSup(\n", " candidateSet, itemSetList, minSup, globalItemSetWithSup)\n", " k += 1\n", "\n", " rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)\n", " rules.sort(key=lambda x: x[2])\n", "\n", " return globalFreqItemSet, rules" ] }, { "cell_type": "code", "execution_count": 26, "id": "c8bd8453", "metadata": {}, "outputs": [], "source": [ "def powerset(s):\n", " return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))" ] }, { "cell_type": "code", "execution_count": 27, "id": "04a9b357", "metadata": {}, "outputs": [], "source": [ "def getFromFile(fname):\n", " itemSets = []\n", " itemSet = set()\n", "\n", " with open(fname, 'r') as file:\n", " csv_reader = reader(file)\n", " for line in csv_reader:\n", " line = list(filter(None, line))\n", " record = set(line)\n", " for item in record:\n", " itemSet.add(frozenset([item]))\n", " itemSets.append(record)\n", " return itemSet, itemSets " ] }, { "cell_type": "code", "execution_count": 28, "id": "e61fb8d0", "metadata": {}, "outputs": [], "source": [ "def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):\n", " freqItemSet = set()\n", " localItemSetWithSup = defaultdict(int)\n", "\n", " for item in itemSet:\n", " for itemSet in itemSetList:\n", " if item.issubset(itemSet):\n", " globalItemSetWithSup[item] += 1\n", " localItemSetWithSup[item] += 1\n", "\n", " for item, supCount in localItemSetWithSup.items():\n", " support = float(supCount / len(itemSetList))\n", " if(support >= minSup):\n", " freqItemSet.add(item)\n", "\n", " return freqItemSet" ] }, { "cell_type": "code", "execution_count": 29, "id": "4d46b1b7", "metadata": {}, "outputs": [], "source": [ "def getUnion(itemSet, length):\n", " return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])" ] }, { "cell_type": "code", "execution_count": 30, "id": "c4288f6d", "metadata": {}, "outputs": [], "source": [ "def pruning(candidateSet, prevFreqSet, length):\n", " tempCandidateSet = candidateSet.copy()\n", " for item in candidateSet:\n", " subsets = combinations(item, length)\n", " for subset in subsets:\n", " # if the subset is not in previous K-frequent get, then remove the set\n", " if(frozenset(subset) not in prevFreqSet):\n", " tempCandidateSet.remove(item)\n", " break\n", " return tempCandidateSet" ] }, { "cell_type": "code", "execution_count": 31, "id": "4152e510", "metadata": {}, "outputs": [], "source": [ "def FassociationRule(freqItemSet, itemSetWithSup, minConf):\n", " rules = []\n", " for k, itemSet in freqItemSet.items():\n", " for item in itemSet:\n", " subsets = powerset(item)\n", " for s in subsets:\n", " confidence = float(\n", " itemSetWithSup[item] / itemSetWithSup[frozenset(s)])\n", " if(confidence > minConf):\n", " rules.append([set(s), set(item.difference(s)), confidence])\n", " return rules" ] }, { "cell_type": "code", "execution_count": 32, "id": "1b2c0b08", "metadata": {}, "outputs": [], "source": [ "def getItemSetFromList(itemSetList):\n", " tempItemSet = set()\n", "\n", " for itemSet in itemSetList:\n", " for item in itemSet:\n", " tempItemSet.add(frozenset([item]))\n", "\n", " return tempItemSet" ] }, { "cell_type": "code", "execution_count": 33, "id": "e3e50e57", "metadata": {}, "outputs": [], "source": [ "g_sequence1=list()\n", "g_sequence2=list()" ] }, { "cell_type": "code", "execution_count": 34, "id": "c3b0cf11", "metadata": {}, "outputs": [], "source": [ "count=1" ] }, { "cell_type": "code", "execution_count": 35, "id": "fe191c97", "metadata": {}, "outputs": [], "source": [ "with open(input_file1) as file1:\n", " for lineno in file1:\n", " if count==3 or count==70 or count ==94 or count==115 or count==130 or count==139 or count==328 or count==415:\n", " lineno = list(lineno)\n", " lineno.remove(\"\\n\")\n", " g_sequence1.append(lineno)\n", " count+=1\n", " else:\n", " count+=1" ] }, { "cell_type": "code", "execution_count": 36, "id": "375b500c", "metadata": {}, "outputs": [], "source": [ "count1=1" ] }, { "cell_type": "code", "execution_count": 37, "id": "d3275fdb", "metadata": {}, "outputs": [], "source": [ "with open(input_file2) as file2:\n", " for lineno in file2:\n", " if count1==3 or count1==70 or count1 ==94 or count1==115 or count1==130 or count1==139 or count1==328 or count1==415:\n", " lineno = list(lineno)\n", " lineno.remove(\"\\n\")\n", " g_sequence2.append(lineno)\n", " count1+=1\n", " else:\n", " count1+=1" ] }, { "cell_type": "code", "execution_count": 38, "id": "880356a8", "metadata": {}, "outputs": [], "source": [ "freqItemSet, rules = Fapriori(g_sequence1, minSup=.5, minConf=.5)" ] }, { "cell_type": "code", "execution_count": 39, "id": "4bfa662f", "metadata": {}, "outputs": [], "source": [ "freqItemSet1, rules1 = Fapriori(g_sequence2, minSup=.5, minConf=.5)" ] }, { "cell_type": "code", "execution_count": 40, "id": "beecdaae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{1: {frozenset({'T'}), frozenset({'C'}), frozenset({'G'}), frozenset({'A'})}, 2: {frozenset({'G', 'C'}), frozenset({'G', 'A'}), frozenset({'C', 'A'}), frozenset({'T', 'C'}), frozenset({'G', 'T'}), frozenset({'T', 'A'})}, 3: {frozenset({'T', 'C', 'A'}), frozenset({'G', 'C', 'T'}), frozenset({'G', 'C', 'A'}), frozenset({'G', 'T', 'A'})}, 4: {frozenset({'G', 'C', 'A', 'T'})}} [[{'G'}, {'C'}, 1.0], [{'C'}, {'G'}, 1.0], [{'G'}, {'A'}, 1.0], [{'A'}, {'G'}, 1.0], [{'C'}, {'A'}, 1.0], [{'A'}, {'C'}, 1.0], [{'T'}, {'C'}, 1.0], [{'C'}, {'T'}, 1.0], [{'G'}, {'T'}, 1.0], [{'T'}, {'G'}, 1.0], [{'T'}, {'A'}, 1.0], [{'A'}, {'T'}, 1.0], [{'T'}, {'C', 'A'}, 1.0], [{'C'}, {'T', 'A'}, 1.0], [{'A'}, {'T', 'C'}, 1.0], [{'T', 'C'}, {'A'}, 1.0], [{'T', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'T'}, 1.0], [{'G'}, {'T', 'C'}, 1.0], [{'C'}, {'G', 'T'}, 1.0], [{'T'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'T'}, 1.0], [{'G', 'T'}, {'C'}, 1.0], [{'T', 'C'}, {'G'}, 1.0], [{'G'}, {'C', 'A'}, 1.0], [{'C'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'A'}, 1.0], [{'G', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'A'}, 1.0], [{'T'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'T'}, 1.0], [{'G', 'T'}, {'A'}, 1.0], [{'G', 'A'}, {'T'}, 1.0], [{'T', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'C', 'A'}, 1.0], [{'C'}, {'G', 'T', 'A'}, 1.0], [{'A'}, {'G', 'C', 'T'}, 1.0], [{'T'}, {'G', 'C', 'A'}, 1.0], [{'G', 'C'}, {'T', 'A'}, 1.0], [{'G', 'A'}, {'T', 'C'}, 1.0], [{'G', 'T'}, {'C', 'A'}, 1.0], [{'C', 'A'}, {'G', 'T'}, 1.0], [{'T', 'C'}, {'G', 'A'}, 1.0], [{'T', 'A'}, {'G', 'C'}, 1.0], [{'G', 'C', 'A'}, {'T'}, 1.0], [{'G', 'C', 'T'}, {'A'}, 1.0], [{'G', 'T', 'A'}, {'C'}, 1.0], [{'T', 'C', 'A'}, {'G'}, 1.0]]\n" ] } ], "source": [ "print(freqItemSet,rules)" ] }, { "cell_type": "code", "execution_count": 41, "id": "bd4004c1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{1: {frozenset({'T'}), frozenset({'C'}), frozenset({'G'}), frozenset({'A'})}, 2: {frozenset({'G', 'C'}), frozenset({'G', 'A'}), frozenset({'C', 'A'}), frozenset({'T', 'C'}), frozenset({'G', 'T'}), frozenset({'T', 'A'})}, 3: {frozenset({'T', 'C', 'A'}), frozenset({'G', 'C', 'T'}), frozenset({'G', 'C', 'A'}), frozenset({'G', 'T', 'A'})}, 4: {frozenset({'G', 'C', 'A', 'T'})}} [[{'G'}, {'C'}, 1.0], [{'C'}, {'G'}, 1.0], [{'G'}, {'A'}, 1.0], [{'A'}, {'G'}, 1.0], [{'C'}, {'A'}, 1.0], [{'A'}, {'C'}, 1.0], [{'T'}, {'C'}, 1.0], [{'C'}, {'T'}, 1.0], [{'G'}, {'T'}, 1.0], [{'T'}, {'G'}, 1.0], [{'T'}, {'A'}, 1.0], [{'A'}, {'T'}, 1.0], [{'T'}, {'C', 'A'}, 1.0], [{'C'}, {'T', 'A'}, 1.0], [{'A'}, {'T', 'C'}, 1.0], [{'T', 'C'}, {'A'}, 1.0], [{'T', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'T'}, 1.0], [{'G'}, {'T', 'C'}, 1.0], [{'C'}, {'G', 'T'}, 1.0], [{'T'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'T'}, 1.0], [{'G', 'T'}, {'C'}, 1.0], [{'T', 'C'}, {'G'}, 1.0], [{'G'}, {'C', 'A'}, 1.0], [{'C'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'A'}, 1.0], [{'G', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'A'}, 1.0], [{'T'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'T'}, 1.0], [{'G', 'T'}, {'A'}, 1.0], [{'G', 'A'}, {'T'}, 1.0], [{'T', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'C', 'A'}, 1.0], [{'C'}, {'G', 'T', 'A'}, 1.0], [{'A'}, {'G', 'C', 'T'}, 1.0], [{'T'}, {'G', 'C', 'A'}, 1.0], [{'G', 'C'}, {'T', 'A'}, 1.0], [{'G', 'A'}, {'T', 'C'}, 1.0], [{'G', 'T'}, {'C', 'A'}, 1.0], [{'C', 'A'}, {'G', 'T'}, 1.0], [{'T', 'C'}, {'G', 'A'}, 1.0], [{'T', 'A'}, {'G', 'C'}, 1.0], [{'G', 'C', 'A'}, {'T'}, 1.0], [{'G', 'C', 'T'}, {'A'}, 1.0], [{'G', 'T', 'A'}, {'C'}, 1.0], [{'T', 'C', 'A'}, {'G'}, 1.0]]\n" ] } ], "source": [ "print(freqItemSet1,rules1)" ] }, { "cell_type": "code", "execution_count": 42, "id": "5d6d2717", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(freqItemSet1)" ] }, { "cell_type": "code", "execution_count": 43, "id": "016bf481", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "IOPub data rate exceeded.\n", "The notebook server will temporarily stop sending output\n", "to the client in order to avoid crashing it.\n", "To change this limit, set the config variable\n", "`--NotebookApp.iopub_data_rate_limit`.\n", "\n", "Current values:\n", "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n", "NotebookApp.rate_limit_window=3.0 (secs)\n", "\n" ] } ], "source": [ "freqItemSet, rules = fpgrowth(g_sequence1,minSupRatio=0.5,minConf=0.5)\n", "print(rules) " ] }, { "cell_type": "code", "execution_count": null, "id": "482b6678", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }