{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1d56aa6c",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
"import math\n",
"from tabulate import tabulate\n",
"from difflib import SequenceMatcher\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"from collections import defaultdict\n",
"from itertools import chain, combinations\n",
"from fpgrowth_py import fpgrowth\n",
"import time\n",
"\n",
"from dna import dna\n",
"from scov import numpy_image_dict\n",
"from helper import *\n",
"import zlib\n",
"import lzma"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1d03a554",
"metadata": {},
"outputs": [],
"source": [
"input_file1 = r'./input/China_Seq.txt'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ddb79f61",
"metadata": {},
"outputs": [],
"source": [
"input_file2= r'./input/USA_Seq.txt'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "07294521",
"metadata": {},
"outputs": [],
"source": [
"df_sequence = pd.DataFrame(columns=[\"line no\",\"sequence ratios\"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ae2da989",
"metadata": {},
"outputs": [],
"source": [
"count_lines=0"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "adf7ace4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" line no | \n",
" sequence ratios | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [line no, sequence ratios]\n",
"Index: []"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sequence"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d32d1210",
"metadata": {},
"outputs": [],
"source": [
"current_start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a48476fe",
"metadata": {},
"outputs": [],
"source": [
"with open(input_file1) as file1, open(input_file2) as file2:\n",
" for lineno, (sequence1, sequence2) in enumerate(zip(file1, file2), 1):\n",
" sequence = SequenceMatcher(a=sequence1 , b=sequence2) #comparing both the strings\n",
" #print(lineno,\" - \",sequence.ratio())\n",
" df_sequence.loc[len(df_sequence.index)] = [lineno,sequence.ratio()]\n",
" df_sequence['line no'] = df_sequence['line no'].astype(int)\n",
" count_lines+=1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "761e7493",
"metadata": {},
"outputs": [],
"source": [
"current_end_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "54c99034",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Time : 1.7870872020721436\n"
]
}
],
"source": [
"print(\"Total Time :\",current_end_time-current_start_time)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "24d5ea72",
"metadata": {},
"outputs": [],
"source": [
"df_sequence_missings = df_sequence[df_sequence['sequence ratios']<1]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f74e49b1",
"metadata": {},
"outputs": [],
"source": [
"length_changed_genome = len(df_sequence_missings)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d7393abd",
"metadata": {},
"outputs": [],
"source": [
"perc_missing_values = (length_changed_genome/count_lines)*100"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "63ddc7c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percentage of changed Genome Sequence 1.31 %\n"
]
}
],
"source": [
"print(\"Percentage of changed Genome Sequence \",round(perc_missing_values,2),\"%\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "64ef5d9d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" line no | \n",
" sequence ratios | \n",
"
\n",
" \n",
" \n",
" \n",
" 121 | \n",
" 122 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 249 | \n",
" 250 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 251 | \n",
" 252 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 254 | \n",
" 255 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 426 | \n",
" 427 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 589 | \n",
" 590 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 601 | \n",
" 602 | \n",
" 0.985915 | \n",
"
\n",
" \n",
" 611 | \n",
" 612 | \n",
" 0.989474 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" line no sequence ratios\n",
"121 122 0.985915\n",
"249 250 0.985915\n",
"251 252 0.985915\n",
"254 255 0.985915\n",
"426 427 0.985915\n",
"589 590 0.985915\n",
"601 602 0.985915\n",
"611 612 0.989474"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sequence_missings"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "5b7fab09",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEPCAYAAABP1MOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4kklEQVR4nO3dd3gU1frA8e+b0JEOAkoJTRSlGiIgKEUpKqCACtgQBcSLoFfx4lUxihflp1e9XLkqWMACCKIISBGRIgiS0LsiRQJKCx1Dy/v7YyZhk2ySDW5J2PfzPHmyO3N25t3JZN+dc86cI6qKMcaY8BUR6gCMMcaEliUCY4wJc5YIjDEmzFkiMMaYMGeJwBhjwly+UAeQU2XLltWoqKhQh2GMMXnKihUrDqhqOW/r8lwiiIqKIj4+PtRhGGNMniIiOzNbZ1VDxhgT5iwRGGNMmLNEYIwxYS5giUBEPhSRfSKyPpP1IiIjRWSriKwVkUaBisUYY0zmAnlFMBZon8X6DkAt96cv8E4AYzHGGJOJgCUCVV0EJGZRpDPwsTqWASVFpGKg4jHGGONdKNsILgd2eTxPcJdlICJ9RSReROL3798flOCMMSZc5InGYlUdrarRqhpdrpzX+yGMMcZcIPH3fAQrVqyIioyM7BsREdHhzJkz5Q4dOnRpuXLl9qQvd/jw4TIFCxZMKly48AmAffv2XV6mTJk/IiMjz2W1/YMHD1atWNFqkIwx4SsyMpJixYpRunRpChYs6NNrRGSFqkZ7W+fXO4tXrFgRlT9//i/Lly9fsmTJkseSk5MTt27dWqZu3boH0pdNTEw8s2/fvktr166989ixY0Xz5ct3+uqrr96b3T42btxY9aqrrvIpnrUJh3P+JrJQr1JJv24P/B8jWJz+ZnH6V7jG6a8YVRWSz3H82EmOH/+NKlWq+JwMMuPXRBAZGdm3fPnyJcuXL5/4yy+/VDtx4kSxc+fO5Vu9enW9ihUr7lFVAahQocL+UqVKHTly5EiJdevWXSMiyVFRUTv8GYsxxlyMRAQi85GvSHFKFTpLYmIif7WWxK+JICIiokPJkiWPAdSqVWt7VmVFhGrVqv3mz/0bY0w4KV68ODt27PjLicCvjcWqWqpAgQJn/LlNY4wx3uXPn59z57JsVvWJ33sNiYi/N2mMMcYLf33e5onuo8YYYwLHEoExxoQ5SwTGGBPmgjpDWdSQb671z5ay7JB0waYNuD4g2w2E3bt+45Zm9enUrQfD3vxfqMO5YCNHjuTdd99l+/btJCUl8eabb/L4448HdJ9xSxfz8F0deeSJf9D/70MCui8R4cYbb2TBggUB3Y8xf4VdEQSAiFijuQ8mTpzIoEGDKFSoEI8//jgvvPACTZo0yfI1sbGxqcf3/vvvz7Rc/NIl1K9civqVS9GhaT1/h57nbdmyhT59+lCzZk0KFSpE0aJFqVatGm3btuWll15i795s7+00F5E8N2exuXjMmDEj9fdll12Wo9fmy5ePL774gpEjR1KyZMkM66dMGEe+fPk4e/ZshnXXNGjE1Pk/UbJ0mQuKOyc2bdpEkSJFAr6fnPj++++59dZbSUpKomnTprRv357ixYuzZ88efvzxR+bOnUuzZs0oX758qEM1QWKJwITMnj3OEFQ5TQIAt912G1OnTuWzzz7jb3/7W5p1Rw8fZt7M6dxwU3u+nz0jw2sLFy5CtZpXXFjQOXTllVcGZT850a9fP5KSkhg7diwPPPBAhvVr166lVKlSIYjMhIpVDQXJjh07EBF69erFjh076N69O2XLlqVxzQr0uKUVC7+b7fV1J44f47UXn+XmxlfTuGYFOreM4ePRo9Dk5Ez3dfLkSV555RUaNGhA0aJFueSSS2jatCkTJkxIU+6nn36iQIECVK9enSNHjqRZ9/vvv1O+fHkuueQSNm/e7NN7PH3qFB+MepOuNzXjulqX0eyqKvTq0oE5079KU+6dN16lfuVSzJ8/HzhflZaT6rT27dtTqVIl3n///Qzrpn/5OadOJdG1p/eqo7ili6lfuRTvvPFqmuUJO3fw0j8e57bmjYipWZEW11Sj603NeOSRRzh48OD593n6NCNHjqRRo0aUKlWKIkWKEBUVRefOnfnuu+/SbFNEaNmyZZplsbGx1K9cirili5n7zdf0vK0N19W6jBbXVOPpR3uz9/cMYzQCsH71Svr17ELTKyvT7Koq9O1xO2tWLE89nr60Q+zbt4+tW7dSokQJr0kAoF69elSuXDnD8r2/72b4c4O55foGRNcozw11qzPwwR6sX73S63YO7t/HC089RquGVxBTsyJ3tWvBtMkTWLBgASJCbGxsmvJRUVFERUV53VZKlaC397h582aef+JR2sZczbXVL6VVwysYMuBhdvz6S4ayzz/xKPUrl2L3rt+Y/OlHdL2pGY1rVqBVwyt46R+Pc+zokQyvAUhISGDgwIHUqlWLwoULU7p0aWJiYhg2bJjXsgMGDKB69eoULFiQMmXK0KlTJ6/H6cTxY7z31mt0adOUZldVoemVlbn1+oYM7t+bjWtXe40lEOyKIMh27txJTEwM1atX57777uPXhD+YM/0rHn/oHt6bMJWYZi1Sy54+dYo+3W9nw5qV1K5zDbfc0Y1jR48weuRrrFi2xOv2jx45QvNOd7Bq1SoaNWpE7969SU5OZs6cOfTs2ZMNGzbw8ssvA3DdddcxfPhwBg8eTJ8+fZg0aRIAycnJ3HPPPezbt4+xY8dy5ZVXZjtg1pnTp+l/b1fily2hWs0ruPuBh0j680/mzpzG04/2ZsuGdQwcMhSA6KbNeQSY9eVEdu7cyQsvvJDj4xgZGUnv3r156aWXiI+PJzr6/KCKX44fx+VVqnJd85Y+b2//3j/oeVtrThw/RvNWN9Pmlo6cPnWK3b/t5JNPPmHAgAGUKeNUJfXq1YsJEyZwzTXXcP/991O4cGH27NnD4sWLmT17NjfddJNP+5z08QcsmDuLljd3ILpJM9atWsGc6V/x86YNTJq9iAIeA4mtWLaER+7tSvK5c7Ru35HKVaP4ZctGHr67U5pzJjslSpQgX758HD9+nN9//93noQk2rVvDI/d04cjhQzS7sTVt2nfkcOJB5n/7Db26duDNMZ/QonXb1PKHEg9y/+3tSPhtBw0bN6FhTBMO7N3Ly8/8nXbt2maxp5yZPXs2Xbp04cyZM9xwU3uqRFVj7+97mDd7Bj98/y3vfz6dq+rWz/C6t4a/wI8L53HDTe1pekNr4pb+wJTx4/htxzbe/3xamrLx8fG0a9eOxMREbrjhBrp06cLJkyfZuHEjsbGxPP/886llV65cSdu2bUlMTKRdu3Z06dKFAwcOMHXqVGbPmZPmOKkqj97XjdXxy6l/bWPu6H4f+fJFsvf3PcQtXUyj65pQp14Dvx2rrFgiCLIFCxYQGxub+uG3NuEwHTp349H7ujHu3ZFp/qk/Hv02G9aspE2Hjrz+7lgiIpwLuN6PPkGPW1p63f5rsc+watUqRowYwdNPP526PCkpidtvv53hw4fTrVs3GjRoAMCTTz7J/PnzmTx5Mu+99x79+vVj2LBhzJ8/n/vvvz/Tb43pfTz6beKXLaF5q5v4z4cTyJfPObX6PfEP7u3Yhg9GvckNN7WjQfR1NG7anMZNm7Np5TJ27tyZ4Zuhr3r37s3LL7/MmDFjUhPB2pVxbN2yiQGDn83RFcbcmdM4cvgQT8e+wj0PPZJmXY1S+VOP/ZEjR5g4cSLXXnstP/30E5GRkWnKel45ZGfJgnmMnz6PWlddnbpsyICHmfX1FOZ/O5N2He8AnMQcO3ggp0+dYtTHk2je6ubU8pM++ZB//fNJn/dZsGBBOnfuzJQpU2jevDn9+/enRYsW1K1bN9O2jLNnzzK4/4OcPHmC9z+fTnTT873r9v3xOz1va0Ps4IHM+nFNavL674hhJPy2g3sf6s/g2OGp5bv36sP9t/snERw6dIgePXpQpEgRxkyaQY0rzlfD/bJ5I/d1bkvs0wP5fNbCDK9duzKOL+YuoeLllVPfY5+7OxH34w+sW7WCug2dDo6nT5/mzjvvJDExkc8++4yePXum2U5CQkLq47Nnz3LXXXdx/Phx5s+fz4033pi6bs+ePTRoFJ3mOG3dvJHV8ctp1e5W3nr/0zTbTU5O5vjRo3/9IPnIqoaCrGrVqjz33HNpll3fsg0VL6+U4dLx60njiYiI4Il/vpj6QQRQqUpVevbul2Hbhw8l8s1Xk4iOjk6TBAAKFSrEiBEjUFXGjx+fulxEGDduHJdffjmPP/44o0aNYtiwYdSuXZv//c/3bqlTP/8MEeGpof9KTQIAZcqWo8+gwQB8OeETn7fni6pVq9K2bVsmTJjAiRMnAJgy/mMiIyPpfNc9F7TNgoUKZVhWtGhRChcuDDjHS1UpWLBgmr9JipSrBl/0fLBvmiQA0KWnk3g9z4XV8T/x245tNG7WIk0SAOh2Ty+qVq/p8z4BxowZQ5cuXdi+fTuDBw+mSZMmFCtWjPr16/Pcc89l6DH0w7xv2bVzOz169UmTBAAurVCRB/sP5MC+vfy0xPnAPXPmDN98NZmilxTjkb//I035q+s35J57Luxvk97HH3/M4cOHefHFF9MkAYBaV9aha4/72bx+Lb/+nLFqs9/jT6cmAXA6H6ScM+vXrEhdPn36dHbs2EGnTp0yJAGASpUqpT7+5ptv+PXXX3nsscfSJAFw2sHSH6cUhbyccxERERT30gkiUOyKIMgaNGiQ4VskQPnLLmftirjU5yeOH+O3HduocNnlVI6qlqF8dNPm8OaINMs2rF7JuXPnvNa/gvMPCk5PFk9ly5Zl/PjxtG7dmgEDBlCoUCE+//xzihYt6tN7Son10gqXeW2EjWl2AwCbN6z1aXs50adPH2bPns3EiRO58847mTP9K1q0bsulFSp67TGUmZY3t+e/I4bxynOD+XHh9zS7sTUNoq/L8AFTvHhxOnbsyPTp02nQoAFdu3alRYsWXHfddTnuHVSnXsMMyypUdGZrPXrkcOqyzRvWAdCwccautREREdS/Noad27b6vN9SpUoxZcoUduzYwZw5c4iPjycuLo61a9eydu1a3nnnHWbPnk3jxo0BWLNyOQC/707I0K4C8Nv2XwHY9svPtGjdlh1bfybpz5M0imlKseIlMpRv2bIl48aN8znezCxdutSJb80a1v+6K8P6ndudY7J9688Z/o5ej/1l7rE/fL6dYNmyZQB06NDB53gyu8pdv2YDcP44Vb/iSmpfXZdZX0/h9927aNn2Fho2bsLV9RqSv0CBbPfnT5YIgsxbV0eAfJH5SPZoAD5+zLksLFP2Uq/ly5bLuPzw4UQA4uLiiIuLy7A+ddvHj2dYFhMTQ5UqVdi+fTutWrWifv2M9aqZbs+Ntdyl3rsbpizPrCHur+jYsSPly5fn/fff58yZM/x58gRdMmkkzspllarw2fTveOeNEfy48DvmzZoOOB8Oz/zjaQYOHJha9vPPP2fEiBGMHz8+tYqvUKFCdOvWjddff93nbpfFSmT8kIx0r6aSPUaUTKkiKFPW+zStZS5w+taoqCj69etHv37O1WVCQgKPPvoo06dPp0+fPqxevRqAI4cOAfDtjKlZbu/Pk85V2bGUc9fLOQpQoUKFC4o3vZRquDFjxmRZ7qR7tejJW4JKPfbJ54/94cOHAbj8cq/TqXuNZ/LkyVmWSzlOkZGRvD9xGu+99X/Mnfk1bw2PBaDoJcXo2K07g4YMpUjRS7Ldrz9YIsilLilWHICDB/Z5XX9gf8blKa954okneOONN3K0v0GDBrF9+3bKli3LrFmz+Oyzz3y+hE/Z74H93m9C2r/PWV7MLedP+fPn58EHH+TVV18lISGB8hUvy1B94qvqtWrz2jsfcvbsWX7euJ5lixcw4aMxDBo0iKJFi/LQQw8BULhwYWJjY4mNjWXXrl0sWrSIsWPH8umnn7Jjxw5++OEHf75FihYrBsDBA/u9rj+43/vynKpUqRITJ06kVKlSrFmzhsTERCAi9e/7nw8+o2XbW7LdTsrf+aCXcxTgjz/+8Lo8IiKC06dPe12X8oHsqYSbSNesWQOlq2Qb14VI+eK2e/fubMumxPP111/TqVOnDOu9dbgoXrIkg2OHMzh2OL9t30b8T0v44tOxTBw7hmNHjzD8P+/9pfh9ZW0EuVTRS4pRJao6+/74nV07Mg6pEb90cYZl1zS4loiIiBx/EE2aNInRo0dzww03sHLlSsqVK8cjjzzCL79k7H6XWayVq1Zj3x+/s9OtJvAU96MTz5XX+H6VkRMPP/wwIkJCQgK3332v16q3nMiXLx916jWg96OPM+Jt59vm1KlTvZatXLky99xzD3PmzKFmzZosXrw4Rw3Gvrjq6roArIpblmFdcnIya1Ys99u+ChYsSAG3WiJlPvN6jZyG+JXLl/q0jaiaV1CocBG2bFzv9Sows26upUqVYu/evalVmJ7i4+MzLEu5C93fidfbPmbNmuVz2QuNp0q16nTpfh8fTp5BkaKXsODb7PfpL5YIcrHOd/UkOTmZt16JTVNtlPDbTsZ/mPGbQpmy5bjljjuJj49n2LBhXies+PXXX9m+/Xxi2bZtG3369KFMmTKMHz+eypUrM27cOE6cOMHdd9/NqVOnfIr19rvvQVV58+WhafZ7KPEgo0e+llomEGrUqMHs2bP56quvvDai+2Lj2tVeP7RSvoWn1P/v37+fdevWZSh34sQJjh8/Tr58+VI/SP2lQeMmVK5ajbgff2Dx/Llp1n3x2dgctQ+cOHGCYcOGZTqExFtvvcXx48epU6dOasN3y7a3ULlqNT4f9wE/fP+t19etWbGcP/88CThXabfecScnjh/j3TfStWOtWcVnn33mdRsxMTGcPXuWjz76KM3ysWPHsmRJxu7SDz74ICVLluTFF19k3aoVGdYnJycT5+ULU0507NiRqKgopk2bluE+HEjba6hz587UqFGDUaNGMXPmTK/b8zxOCb/tJGHnjgxljh45zOnTp7x2XAgUqxrKxe7vO4Dv58zku5nT6N7hRpre2JpjR4/w7YypXBvTjAVzM35jeGbY/3Fg906GDh3KJ598QvPmzSlfvjx79uxh06ZNxMXFMWHCBKpVq8aZM2fo3r07R48eZdq0aan1oB06dODJJ5/k9ddf56mnnuK///1vtrE+0O8xFs//jvnfzuTOts1p0fpm/vzzT+Z+8zWJB/bTq/9AGsU09fsxStG2rdMl8UInCJ/x5ed88elYGsY0oVLVKIqXKEnCzh0snDubggULpg6Et3v3bho2bEjdunVTb7w6evQoM2bM4I8//mDgwIEUc6ty/CUiIoIXXhvJo/d1Y1DvnrTp0JHKVavx8+YNLPthAc1b3cTi+d957cWU3pkzZxg6dCgvvvgiMTExNGjQgFKlSpGYmMiSJUtYt24dRYsW5d133019Tf78+XljzMf0v7cbAx64mwbRMdSuU5dChQvzx57dbFizioTfdjBvxWYKF3YS5mP/eJ6fFi/k0w/eYcPaVan3EcyZ/hW33HIL06ZNyxDbY489xkcffUT//v2ZN28elStXZvXq1SxdupTbbrstdUiSFGXKlOGLL77gjjvu4L7ON3Nd8xupccWVCMIfv+9mzYo4jhxOJG6r96ooXxQoUIDJkyfTtm1bevbsyXvvvUeTJk1ISkpi06ZNzJs3L7VTQv78+fnyyy9p164dt956K82aNaNBgwYUKVKEXbt2sWTpT2mO088b1/P3vvdxdf1GVK95BeXKV+BQ4kHmfzuTs2fO8GD/QRccd04FNRHsePXWjGk7hzZu3HhtnTp1fCp7oR8KuUWBggUZPeEr3nljBHOmf8X4D9/jskpV6PPYU7Rpf5vXRHBJseIsXLiQ0aNHM378eKZMmUJSUhLly5enVq1avPnmm9x8s1OHPmTIEOLi4hg4cCAdO3ZMs53hw4ezaNEi3n77bVq3bk2Nxq2yjDV/gQK8N/4rPhkziplTv2DCR2OIzJePK+pczdMvDKfD7d38d2ACoH3nrpw+dYo1K5azce1qTiUlcWmFirTr1IV/DX2Ga665BnAaWF988UUWLFjA/PnzOXDgAKVLl6Z27dq8+uqrdO/ePSDxNW7anA8mz2DUa//ih++dq4K6Da/l/c+n8c1XTuNk8eLZt8EUL16cWbNmMXfuXBYvXszUqVPZv38/hQoVolq1agwaNIjHH388wx2+V1x1DZPm/MAnY0ax6Ls5fD1pPBIRQblLy3PlNfXo/+SQNGM3lSpdhnFfzWbkiGEs+m42G9euJqpGTZ4d/m9aNKrjNRHUqVOH7777jn/+859Mnz6dfPny0aJFC5YuXcqXX36ZIREAtGnThrVr1zIk9l8sXfg9K5cvJX/+ApQrX4GY61twU4eMdfU5FR0dzerVq3n11VeZNWsWP/74I8WKFaNmzZq89NJLacrWq1ePNWvW8MYbbzBjxgw++ugjIiIiqFixYobjdHV9p/pxxU9LWLJwHkePHKZU6bLUqVufnr37XXBb14WQlHpAf1izZs2O+vXrH/DbBr0IZSKoV6mkX7cHgUlWFqd/5fY4H7ijHetWreDIkSM+d/n1VSCOZ+LW1bRq1YoXXnjhgm8mTC8v/N0DFeOmTZu46qqrsi0rIitUNdrbOmsjMCYP+PPPkxw9krEN4+tJ41kdv5ymN7TyexIw4cPaCIzJA/7YncDd7W+kSYuWVI6qzrlzZ9m8fi2r4pZRrEQJnnz+5VCHaPIwSwTG5AFlyl7KLXfcyYplS4hbupjTp09RttyldL7rHvo89qTXu8+N8ZUlAmPygOIlSxL72shQh+EXLVu2xJ9tk+avszYCY4wJc5YIjDEmzPk9EdglnzHGBIe/Pm/9mghE5NDp06fz+3ObxhhjvDtz5sxfHlsL/JwIkpOTZx0+fNi/99cbY4zx6ujRo34Z0sSvieDcuXOj9+7de3jv3r2lT506ld+qiYwxxr9UFT13lrMnj3Lo0CFKly79l7fp1yEmAFasWBEVGRnZNyIiooOqlvLrxoGDBw9W9XXC7YRDf/p135VKFfbr9sD/MYLF6W8Wp3+Fa5z+jPHEmWTid//J811jKOjOE52drIaY8HsiCLTo6Gj1Nja5N1FDvvHrvne8eqtftwf+jxEsTn+zOP0rXOMMdYwhG2tIRNqLyBYR2SoiQ7ysryIi80VklYisFZHspz8yxhjjVwFLBCISCYwCOgB1gB4ikn7Y0OeASaraEOgO/C9Q8RhjjPEukFcEMcBWVd2mqqeBiUDndGUUSBlEvQSwJ4DxGGOM8SKQieByYJfH8wR3madY4F4RSQBmAo9525CI9BWReBGJ3++nibqNMcY4Qj3ERA9grKpWAm4BPhGRDDGp6mhVjVbV6HLlygU9SGOMuZgFMhHsBip7PK/kLvP0EDAJQFWXAoWAsgGMyRhjTDqBTARxQC0RqSYiBXAag9NPVPob0AZARK7CSQRW92OMMUEUsESgqmeBAcAcYBNO76ANIvKSiKTMKP0k0EdE1gATgF6a125sMMaYPC6gE9Oo6kycRmDPZUM9Hm8Erg9kDMYYY7IW6sZiY4wxIWaJwBhjwtwFJQIRudLfgRhjjAmNC70i+NavURhjjAmZTBuLRWRkZquAkgGJxhhjTNBl1WvoQZzunae8rOsRmHCMMcYEW1aJIA5Yr6o/pl8hIrEBi8gYY0xQZZUIugFJ3laoarXAhGOMMSbYMk0EqpoYzECMMcaEht1HYIwxYc4SgTHGhDlLBMYYE+ayTAQiUkpEXk63rIeINAtsWMYYY4Ily0SgqoeAm0WkpsfiocDPAY3KGGNM0PhSNfQB0BtARFoCG1X1QABjMsYYE0S+JIIJQFcREaAXMCagERljjAmqbBOBqh4DfgTuBq7DmXHMGGPMRcLXGcreB6YDb9tUksYYc3Hxqfuoqi4BPsFJCMYYYy4iPs9ZrKqDAhmIMcaY0LAbyowxJsxZIjDGmDBnicAYY8KcT20E7pASUZ7lVfXjAMVkjDEmiLJNBCLyCVADWA2ccxcrYInAGGMuAr5cEUQDdez+AWOMuTj50kawHqgQ6ECMMcaEhi9XBGWBjSKyHDiVslBVOwUsKmOMMUHjSyKIDXQQxhhjQifbRKCqC0WkPNDYXbRcVfcFNixjjDHBkm0bgYjcBSwH7gTuAn4SkW6BDswYY0xw+FI19CzQOOUqQETKAd8BXwQyMGOMMcHhS6+hiHRVQQd9fJ0xxpg8wJcP9NkiMkdEeolIL+AbYKYvGxeR9iKyRUS2isiQTMrcJSIbRWSDiIz3PXRjjDH+4Etj8WAR6Qpc7y4arapfZfc6EYkERgE3AwlAnIhMU9WNHmVqAc8A16vqIRG59ELehDHGmAvn01hDqjoFmJLDbccAW1V1G4CITAQ6Axs9yvQBRqnqIXc/1hvJGGOCLNOqIRFZ7P4+JiJHPX6OichRH7Z9ObDL43mCu8zTFcAVIrJERJaJSPtMYukrIvEiEr9//34fdm2MMcZXmV4RqGpz93exAO+/FtASqAQsEpG6qno4XSyjgdEA0dHRNuaRMcb4kS/3EXziyzIvdgOVPZ5Xcpd5SgCmqeoZVd0O/IyTGIwxxgSJL72GrvZ8IiL5gGt9eF0cUEtEqolIAaA7MC1dmak4VwOISFmcqqJtPmzbGGOMn2TVRvCMiBwD6nm2DwB7ga+z27CqngUGAHOATcAkVd0gIi+JSMqAdXOAgyKyEZgPDFbVg3/xPRljjMmBrNoIXgFeEZFXVPWZC9m4qs4k3T0HqjrU47ECf3d/jDHGhIAv9xE8IyKlcOruC3ksXxTIwIwxxgSHL1NVPgwMwmnsXQ00AZYCrQMamTHGmKDwpbF4EM4Q1DtVtRXQEDgcyKCMMcYEjy+JIElVkwBEpKCqbgZqBzYsY4wxweLLEBMJIlISp6vnXBE5BOwMZFDGGGOCx5fG4jvch7EiMh8oAcwKaFTGGGOCJkfzCqjqQiAJH4ehNsYYk/tldUNZaxH5WUSOi8inIlJXROKBV4B3gheiMcaYQMrqiuDfQF+gDM60lEuBsap6rap+GYzgjDHGBF5WbQSqqgvcx1NFZLeqvh2EmIwxxgRRVomgpIh08Szr+dyuCowx5uKQVSJYCHT0eL7I47kClgiMMeYikNWgcw8GMxBjjDGhkaPuo8YYYy4+lgiMMSbMWSIwxpgw58ucxUVE5HkRGeM+ryUitwU+NGOMMcHgyxXBR8ApoKn7fDfwcsAiMsYYE1S+JIIaqvp/wBkAVT0JSECjMsYYEzS+JILTIlIY594BRKQGzhWCMcaYi4Av8xG8AMwGKovIZ8D1QK9ABmWMMSZ4fJmPYK6IrMSZq1iAQap6IOCRGWOMCQpfeg3dAZxV1W9UdQZwVkRuD3hkxhhjgsKXNoIXVPVIyhNVPYxTXWSMMeYi4Esi8FbGl7YFY4wxeYAviSBeRN4QkRruzxvAikAHZowxJjh8SQSPAaeBz92fU8DfAhmUMcaY4PGl19AJYEgQYjHGGBMC2SYCEbkCeAqI8iyvqq0DF5Yxxphg8aXRdzLwLvA+cC6w4RhjjAk2XxLBWVV9J+CRGGOMCQlfGouni8ijIlJRREqn/AQ8MmOMMUHhyxXBA+7vwR7LFKju/3CMMcYEW7ZXBKpazcuPT0lARNqLyBYR2SoimfY8EpGuIqIiEp2T4I0xxvx1vs5Q9pyIjHaf+zRDmYhEAqOADkAdoIeI1PFSrhgwCPgpp8EbY4z563ydoew00Mx97usMZTHAVlXdpqqngYlAZy/lhgEjgCQftmmMMcbPAjlD2eXALo/nCe6yVCLSCKisqt9ktSER6Ssi8SISv3//fh92bYwxxlchm6FMRCKAN4AnsyurqqNVNVpVo8uVK/dXd22MMcZDIGco2w1U9nheyV2WohhwDbBARAAqANNEpJOqxvuwfWOMMX4QyBnK4oBaIlINJwF0B3p6bPcIUDbluYgsAJ6yJGCMMcHly1hDN7gPj7m/64gIqrooq9ep6lkRGQDMASKBD1V1g4i8BMSr6rS/Ergxxhj/8KVqyPNGskI4vYFWANkOOqeqM4GZ6ZYNzaRsSx9iMcYY42e+VA119HwuIpWBtwIVkDHGmODypddQegnAVf4OxBhjTGj40kbwX9yuoziJowGwMoAxGWOMCSJf2gg8e/GcBSao6pIAxWOMMSbIfGkjGBeMQIwxxoSGL1VD6zhfNZRmFaCqWs/vURljjAkaX6qGZrm/P3F/3+P+tlnLjDHmIuBLIrhZVRt6PB8iIitVNdP5BYwxxuQdvnQfFRG53uNJMx9fZ4wxJg/w5YrgIeBDESnhPj8M9A5YRMYYY4LKl15DK4D6KYnAHSzOGGPMRcKXqSrLi8gHwERVPSIidUTkoSDEZowxJgh8qesfizOC6GXu85+BxwMUjzHGmCDzJRGUVdVJQDI4w0sD5wIalTHGmKDxJRGcEJEynJ+qsglg7QTGGHOR8KXX0N+BaUANEVkClAO6BTQqY4wxQeNLr6GVInIjUBtnWIktqnom4JEZY4wJikyrhkSksYhUgNR2gWuBfwH/FpHSQYrPGGNMgGXVRvAecBpS5y1+FfgYp31gdOBDM8YYEwxZVQ1Fqmqi+/huYLSqTgGmiMjqgEdmjDEmKLK6IogUkZRE0Qb43mOdL43Mxhhj8oCsPtAnAAtF5ADwJ/ADgIjUxLqPGmPMRSPTRKCq/xKReUBF4FtV9Zy3+LFgBGeMMSbwsqziUdVlXpb9HLhwjDHGBJvNK2CMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYS6giUBE2ovIFhHZKiJDvKz/u4hsFJG1IjJPRKoGMh5jjDEZBSwRiEgkMAroANQBeohInXTFVgHRqloP+AL4v0DFY4wxxrtAXhHEAFtVdZuqngYmAp09C6jqfFU96T5dBlQKYDzGGGO8CGQiuBzY5fE8wV2WmYeAWd5WiEhfEYkXkfj9+/f7MURjjDG5orFYRO4FooHXvK1X1dGqGq2q0eXKlQtucMYYc5EL5Exju4HKHs8rucvSEJGbgGeBG1X1VADjMcYY40UgrwjigFoiUk1ECgDdgWmeBUSkIfAe0ElV9wUwFmOMMZkIWCJQ1bPAAGAOsAmYpKobROQlEenkFnsNuASYLCKrRWRaJpszxhgTIAGdhF5VZwIz0y0b6vH4pkDu3xhjTPZyRWOxMcaY0LFEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYs0RgjDFhzhKBMcaEOUsExhgT5iwRGGNMmLNEYIwxYc4SgTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwTGGBPmLBEYY0yYC2giEJH2IrJFRLaKyBAv6wuKyOfu+p9EJCqQ8RhjjMkoYIlARCKBUUAHoA7QQ0TqpCv2EHBIVWsCbwIjAhWPMcYY7wJ5RRADbFXVbap6GpgIdE5XpjMwzn38BdBGRCSAMRljjElHVDUwGxbpBrRX1Yfd5/cB16nqAI8y690yCe7zX90yB9Jtqy/Q131aG9ji53DLAgeyLRV6Fqd/5YU480KMYHH6WyDirKqq5bytyOfnHQWEqo4GRgdq+yISr6rRgdq+v1ic/pUX4swLMYLF6W/BjjOQVUO7gcoezyu5y7yWEZF8QAngYABjMsYYk04gE0EcUEtEqolIAaA7MC1dmWnAA+7jbsD3Gqi6KmOMMV4FrGpIVc+KyABgDhAJfKiqG0TkJSBeVacBHwCfiMhWIBEnWYRCwKqd/Mzi9K+8EGdeiBEsTn8LapwBayw2xhiTN9idxcYYE+YsERhjTJizRGCMMWHOEoExxoQ5SwS5nIiUE5GGIlJPRC4JdTy+EpErQx1DVkTk0VDHkB0RqSkiXb2M0ZUr5NVzEyCvxBus/yPrNeRBRK5U1c2hjgPA/ecfCUQBVYBVwKXAQmCQqh4JXXTZE5HfVLVKqOMAEJG/p18EPAMMB1DVN4IelBciMh+4U1UPuEOyPA8sAq4DRqvqf0MaoCuvn5uQu87PrAQrzjwxxEQQfYtzYucGHwIPqOoWEYkB/qaq14lIH5z7L7qFNjwQkZGZrQJKBjGU7LwIzAQ24MQGzr0txUIWkXflPMbZGgg0VdWDIlIEWAbkikRAHjg3wesXgNRVQK65IsgN/0dhd0WQzUF/QFWLBzOezIjIGlWt7/F8pao2ch9vUtWrQhddakzHgCeBU15W/1tVywY5JK9EpArwb2Ab8KKqnhSRbapaPcShpSEiq4DbVHW3e3XQQVWT3CHd16rq1SEOEcgb5yaAiCQBrwFnvax+QlVLBjci73LD/1E4XhE8SOYHvUeQY8nKryLyPPA90AVYDSAi+ck9bTtxwHpV/TH9ChGJDX443qnqb8CdItIZmCsib4Y6pkw8AXwrIlNwrl6+F5E5QHPgo5BGllZeODcBVgJTVXVF+hUi8nAI4slMyP+PwvGK4HvguUwO+nZVrRaCsDIQkZLAP3Em9VkDvKqqx0SkBHCVqi4LZXwAIlIaSFLVk6GOxVciUhSIxRnu/IYQh5OB+/ftCVyB80UtAfg6t7RdQd44NwFEpDaQqKr7vawrr6p7QxBWBrnh/ygcE0HID/qFEpFLVXVfqOMwxlxcctNlXFCoamJeSAIiUjrdTxlguYiUcpNZyIlIe4/HJUTkAxFZKyLjRaR8KGPzlC7OkiLyfi6NM1JE+onIMBFplm7dc6GKKz0RiRaR+SLyqYhUFpG5InJEROJEpGGo40vhnpOvishmEUkUkYMissldVjLU8flCRGYFYz9hlwhEpLiIvCIin4hIz3Tr/hequLw4AKzw+IkHLsep94wPYVyehns8/jfwO9ARp87zvZBE5J1nnK8Df5A743wPuBFnTo7/iohnt9YuoQnJq/8B/wd8A/wIvKeqJYAh7rrcYhJwCGipqqVVtQzQyl02KaSReRCRRpn8XAs0CEoMYVg1NAX4Bac7Xm/gDNBTVU959n4INRF5ErgZGKyq69xluaYNAzL0Flmtqg081qV5Hkp5KM61qlrPfZwP50O1LE4nhmWqmiu+bYvIqpRY0vdz91wXaiKyRVVr53RdsInIOZx7MLzN195EVQsHOoZw7DVUQ1W7uo+nisizOL0zOoUyqPRU9d8i8jnwpojsAl4AclvWvtTtqy1AcRERj4mFctPVZl6Js0DKA1U9C/QVkRdweufkmn7vQJKItMWZUVBF5HZVnSoiNwLnQhybp50i8jQwLqVh2K0K7AXsCmVg6WwC+qnqL+lXuP/7AZeb/gmCpaCIpL5vVf0XMAbnDs4yIYvKC1VNUNU7gQXAXKBIaCPKYAzOTVmXAONwvr0iIhVwuxTmEnklznjP9gwAVX0Rp+toVEgi8q4/Thfs3kA7oJWIHMK5ghkUysDSuRvnf3qhiBwSkUSc/6XSwF2hDCydWDL/LH4sGAGEY9XQ/wHfqup36Za3B/6rqrVCE1lG4owzcjnwE843rRqqul5E2qvq7NBG5/CMUVWPeyzPNTFC3okzPRH5WFXvD3UcWRGRFkAMsE5Vvw11PClE5Dpgs6oeEefu7CFAI5x7NIbnlqEw3Dg3qepRESmMM/xJQ2AjQYoz7BJBVkTkQVXNFTfuiMhA4G84l40NcMZw+dpdlyvaMkTkMWAAuThGyFNxpp/TW3AaN78HUNVcUX0pIstVNcZ93Ad4FJgKtAWmq+qrIQwvlYhsAOqrM23uaOAEMAVo4y7PFQ3wXuI8CXxBEOMMxzaCrKRchucGfYBrVfW4iEQBX4hIlKr+B++NSqHQl9wfI+SdOCvjfFt9H6c9SIBonB5ZuUl+j8d9gbaqul9EXsfphJErEgEQ4ba1AER7JPzFIrI6RDF5E/I4wy4RiMjazFYBuaZPOc7JcRxAVXeISEucD7Cq5J4Pr7wQI+SdOK/FqWN/Fqe32GoR+VNVF4Y4rvQiRKQUTr22pNy5q6onRMTbuD6hst7jKn+NiESraryIXIHTWzC3CHmcYZcIcD7s2+H0JfYkOH2ic4u9ItJAVVcDuN9mb8MZ+bFuSCM7Ly/ECHkkTlVNxuklNtn9vZfc+T9aAufeFsHpNVRRVX8XZ4z/3JRYHwb+I87NeAeApW4vnF3uutwi5HGGXRuBiHwAfKSqi72sG6+qPb28LOhEpBJwVlX/8LLuelVdEoKw0seR62OEvBNneiJyK3C9qv4z1LH4wm2QLa+q20MdiycRKQ5Uwx27KbeMMZReKOMMu0RgjDEmrXC8j8AYY4wHSwTGGBPmLBHkkIgcz75UmvItRWSGn/adX0RWelneW0TWiTOi5npxJmAx6fjzb+Hj/mJF5Klg7c/kjIjMlByOQioij4hIrr7B70Lkxh4JJnPNgTQNm25D6LNAI/cOykuAcqEI7mInIpGqGrSxdIK9v9xORPJ59Lf/y1T1lgt4zbv+2n9uYlcEF8j9drlARL4QZ7zzz0RE3HXt3WUr8Rg+WESKisiHIrJcRFalfHMXkf+IyFD3cTsRWSQe4yF5aA+kH5/8UuAYkNJP/nhKrw0RqSEis0VkhYj84A6zgIhUE5Gl7lXEyylXOem/MYvI2yLSy318rYgsdLc1R0QqussXiMgI9z39LM5wAylj67/uXqGsFefu3ky3k+7YjhWRkSLyo4hsE5FuPsS3Q5zhxVeLSLw4w/jOEZFfReQRj80XF5FvRGSLiLybcpxFpK17TFaKyGQ3oaZsd4T7t7zTY9+RIrJdHCVF5JyI3OCuWyQiKUOV1HGP0TZx7hZPef297jFbLSLviTMvMSJyXET+LSJrgKaZlUt3vG5xz7cV7nGb4S7P7HzrJSJfuufGL+IMu5KyrR7uebFeREZ4LD8uIq+JyAYR+U5EYjzeVyePY/KaOPMSrBWRfu7yiu4xWe1ut4WX9zDUfd16ERktkvq/tEBE3hKReGBQDs6fd0RkmRtfS/c4bBKRsR7ldohIWfc4fSMia9z93+2uf1VENrrv5XV3WepVnmR+7hcRkUnua78SkZ9EJDp9nLmKqtpPDn6A4+7vlsARoBJOQl2K8429EE7/31o4faonATPc1wwH7nUflwR+BoriDCa3AWc4gS04Ywp52/dyoEi6ZZHAHOA3nLuiO3qsmwfUch9fB3zvPp4G3O8+/lu69zTD4/Vv44zUmB/nHoty7vK7gQ/dxwtwJtgGuAX4zn3cH+c2+Xzu89JZbSfdexoLTHaPax1ga1bxuY93AP3dx28Ca3EGmisH7PV4fRJQ3T1uc4FuOIPQLQKKuuX+AQz12O7Tmfw9ZgNXA7fhzG3wLFAQ2O6uj3Xfb0F3HwfdY3AVMB3I75b7n8ffQ4G73MeZlvOIIeV8q+Y+n0D251svYBvO/QCFgJ04dzVfhnMelcOpLfgeuN0jrg7u46+Ab933Uh9Y7S7vizMNLO57jsfpDvkk8KzH+VrMy7Es7fH4E9zzGOf8+p/7OCfnz0Sc/7/OwFGc+0UicO5/aODxty0LdAXGeLy+BM5gdVs437OypMff9Klszv2ncOZoALgGOItzx3DIP78y+7Gqob9muaomgDOuPc4IkcdxPgh+cZd/ivMPAs5YLJ3kfL1xIaCKqm4SZ8yWRcATqvpr+h2JyOU486+mmV1NVc+JM2BeY5yxSd4UZ0KL14FmwGT3yxU4/5wA1+Oc/OD8040ga7VxTui57rYicSahSfGl+3sF50fJvAl4V91LeVVNFJFrstmOp6nq3GC1UXyfRSxlrJ51wCWqegw4JiKn5Hxd8HJV3QYgIhNwkncSTsJZ4sZVACexp/g8k/39ANyA82H3Cs6wIAtxkkKKb1T1FHBKRPbh3NDYBucu4jh3f4WBlClIz+GMh0M25VJcCWzT8333J5DN+eY+nqfuYGYishGoivPht0DdO4VF5DP3/U0FTuMkPnCO7ylVPSMi6zj/N28L1BP3Cg7nA7WWezw+FGdy+6nq3tiXTitxhowugvOlYQNOEoTzxz+789DTdFVVN769en5Ojw1uvJ4xrAP+7V4BzVDVH8SZDyIJ+MC9wsqsbcnbud8c+A+AOoNEZjaaQa5hieCvOeXx+BzZH08BuqrqFi/r6uJ8Y7wsk9e2x/nmn4E6Xz2W40xlORfnyuAN4LBmPumKtxtIzpK2urCQR9wbVLVpJttKOQ7ZHYPstuNtmymvyyq+9K9JTvf6ZI+40r/vlDF95qpqj0xiOZHJ8kU4Vz6XAUOBwThXHT94iQnOHx/BGSP/GS/bTNLz7QJZlfOF1/NNnNEuc3runnHPM/A4vqqa7H5opuzvMVXNcJ6KU212KzBWRN5Q1Y891hXCudqJVtVdIhJL2r9tyvG/kPMnq3MB9z38LCKNcL7Vvywi81T1JRGJwUnG3XAGLWydxX58OYa5lrUR+N9mIEpEarjPPT9c5gCPedR/pszyVBXn8rkh0MH9R03PW/sAInKZexKnaADsVNWjwHYRudMtJyJS3y2zBOjuPr7H47U7ceq0C7rfoNu4y7cA5USkqbut/CJyddaHgblAv5QPCXHmWb6Q7XjKLL6ciBGnjSQCp2phMc5AadeLSE03rqLijPOSneU4V13JqpqE8y2zH06CyMo8oJuIXOrur7R7DlxIuS1AdXEG08N9Tym8nm/ZvJ8b3XrzSJxzNyfjHM0B+rvf/BGRK9xjWRXnW/kYnAH10o/2mvKhf0CctpluePdXzx+vROQy4KSqfgq8BjRy4yihqjOBJ3CqwHy1BHe+AxGpQy4axiQzeTaD5VaqmiQifYFvROQkzrfDYu7qYcBbwFr3g2i7iHQEPsCpd9wjIg/hfGtq7H644P5T1lTVzV52mR943T2Zk4D9QErj6D3AO+KMYZIfp950Dc7AZuNF5B/A1x6x7xKRScB6YDuwyl1+2r3cHykiJXDOm7dwLt8z8z5whftez+DUwb59AdtJlVl8ORSH07ZQE5gPfOV+q+0FTBCRlOqz53Dq1LOK55Q4Y8Iscxf9gPPhuS6b1210/ybfuufBGZy2mp05Laeqf4rIo8BsETlB2mqpDOcbTntGZnH9LiJDcI6L4FRrfZ1ZeS/ex6keWekmn/3A7ThXSYPd8+A4kKb7paoeFpExOH/XP9K9B89yF3Ie+qIu8JqIJOMc4/44/7Nfu1crAvw9B9v7HzDOrXLb7MaXK+Y+yIwNMZEHiEhznEa/R7ItfGHbP66quWkqRJMDInKJOgPpCTAK+EVV3wx1XOHK/eKW3/1SWAP4DqitqqdDHFqm7IogD1BngLwMg+QZ4+ojIg/gNHKvAt4LcTzhrggw360iE+DR3JwEwK4IjDEm7FljsTHGhDlLBMYYE+YsERhjTJizRGCMMWHOEoExxoS5/wfqfBFAp44dGQAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df_sequence_missings['sequence ratios'].plot(kind='bar')\n",
"\n",
"plt.ylabel(\"Sequence Ration < 1\")\n",
"plt.xlabel(\"Index / Sequence number where genomes are missing\")\n",
"\n",
"#add custom legend to bar chart\n",
"plt.legend(['Index of Missing Sequences'], prop={'size': 20})"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d12ca912",
"metadata": {},
"outputs": [],
"source": [
"sequence1_All=''\n",
"sequence2_All=''"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e5789852",
"metadata": {},
"outputs": [],
"source": [
"with open(input_file1) as file1:\n",
" for lineno in file1:\n",
" lineno=lineno.strip('\\n')\n",
" sequence1_All+=lineno"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b6064bd9",
"metadata": {},
"outputs": [],
"source": [
"with open(input_file2) as file2:\n",
" for lineno in file2:\n",
" lineno=lineno.strip('\\n')\n",
" sequence2_All+=lineno"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f8b08d7a",
"metadata": {},
"outputs": [],
"source": [
"def basic_Analysis(DNAseq):\n",
" total_base = len(DNAseq)\n",
" num_Adenine = DNAseq.count('A')\n",
" num_Guanine = DNAseq.count('G')\n",
" num_Thymine = DNAseq.count('T')\n",
" num_Cytosine = DNAseq.count('C')\n",
" \n",
" if total_base != num_Adenine + num_Guanine + num_Thymine + num_Cytosine:\n",
" print('Something is not right')\n",
" else : pass\n",
" \n",
" A_percent = num_Adenine / total_base\n",
" G_percent = num_Guanine / total_base\n",
" T_percent = num_Thymine / total_base\n",
" C_percent = num_Cytosine / total_base\n",
" \n",
" #visualization\n",
" x = np.arange(4)\n",
" bases = ['Adenine', 'Guanine', 'Thymine' ,'Cytosine']\n",
" values = [num_Adenine, num_Guanine, num_Thymine, num_Cytosine]\n",
" plt.bar(x,values)\n",
" plt.xticks(x, bases)\n",
" plt.show()\n",
" table = [['total base',total_base,'Percentage',str('100%')],\n",
" ['Adenine:',num_Adenine, 'Percentage:',str(round(A_percent*100,2))+'%'],\n",
" ['Guanine:',num_Guanine, 'Percentage:',str(round(G_percent*100,2))+'%'],\n",
" ['Thynime:',num_Thymine, 'Percentage:',str(round(T_percent*100,2))+'%'],\n",
" ['Cytosine:',num_Cytosine, 'Percentage:',str(round(C_percent*100,2))+'%']]\n",
" print(tabulate(table))\n",
" print('GC content:', round((((num_Guanine + num_Cytosine) / total_base)*100),2),'%')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "61655d2a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVUUlEQVR4nO3df5TddX3n8eerZFERISBTikmOyZGs3UAphVnIrv1hxRMCeky6pQrrlkCz5myLu3Wtq2B3mxbLEXd7ykqrdFNJCR7Kj0UsOYLGHMDVtYBMAIEQKSM/JDkgIwnolhUMvveP+5nlMsyQmbmTmYQ8H+fcc7/f9/fz+X4/98vNvO73x72kqpAk7dt+ZqYHIEmaeYaBJMkwkCQZBpIkDANJEjBrpgcwWYcddljNnz9/pochSXuVTZs2/aCq+kbW99owmD9/PgMDAzM9DEnaqyR5dLS6p4kkSbsOgyRrkzyZ5L5Rlv1BkkpyWJtPkouTDCa5J8lxXW1XJHmwPVZ01Y9Pcm/rc3GSTNWLkySNz3iODC4Dlo4sJpkHLAG+11U+BVjYHquAS1rbQ4HVwInACcDqJIe0PpcAH+jq97JtSZJ2r12GQVV9Hdg+yqKLgI8C3b9nsQy4vDpuA2YnOQI4GdhYVduragewEVjalh1UVbdV53cxLgeW9/SKJEkTNqlrBkmWAduq6tsjFs0BHuua39pqr1TfOkp9rO2uSjKQZGBoaGgyQ5ckjWLCYZDkAODjwB9N/XBeWVWtqar+qurv63vZnVGSpEmazJHBW4AFwLeTPALMBe5M8nPANmBeV9u5rfZK9bmj1CVJ02jCYVBV91bVz1bV/KqaT+fUznFV9QSwHjiz3VW0GHimqh4HNgBLkhzSLhwvATa0ZT9MsrjdRXQmcP0UvTZJ0jiN59bSK4Fbgbcm2Zpk5Ss0vxF4CBgE/hr4PYCq2g58ArijPc5vNVqbz7U+3wW+PLmXIkmarOyt/3Ob/v7+8hvI2hvNP/eGmR7CjHrkwnfN9BD2aUk2VVX/yLrfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKAWTM9gJngTwj7E8KSXsojA0mSYSBJMgwkSRgGkiQMA0kShoEkiXGEQZK1SZ5Mcl9X7b8l+U6Se5J8McnsrmXnJRlM8kCSk7vqS1ttMMm5XfUFSW5v9auT7D+Fr0+SNA7jOTK4DFg6orYROLqqjgH+ATgPIMki4HTgqNbns0n2S7If8BngFGARcEZrC/Ap4KKqOhLYAazs6RVJkiZsl2FQVV8Hto+ofbWqdrbZ24C5bXoZcFVVPVdVDwODwAntMVhVD1XV88BVwLIkAd4BXNv6rwOW9/aSJEkTNRXXDH4H+HKbngM81rVsa6uNVX8j8HRXsAzXJUnTqKcwSPKHwE7giqkZzi63tyrJQJKBoaGh6dikJO0TJh0GSc4C3g28v6qqlbcB87qazW21sepPAbOTzBpRH1VVramq/qrq7+vrm+zQJUkjTCoMkiwFPgq8p6qe7Vq0Hjg9yWuSLAAWAt8C7gAWtjuH9qdzkXl9C5FbgNNa/xXA9ZN7KZKkyRrPraVXArcCb02yNclK4C+BNwAbk9yd5K8AqmozcA1wP/AV4JyqeqFdE/ggsAHYAlzT2gJ8DPhwkkE61xAundJXKEnapV3+hHVVnTFKecw/2FV1AXDBKPUbgRtHqT9E524jSdIM8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRLjCIMka5M8meS+rtqhSTYmebA9H9LqSXJxksEk9yQ5rqvPitb+wSQruurHJ7m39bk4Sab6RUqSXtl4jgwuA5aOqJ0L3FRVC4Gb2jzAKcDC9lgFXAKd8ABWAycCJwCrhwOktflAV7+R25Ik7Wa7DIOq+jqwfUR5GbCuTa8DlnfVL6+O24DZSY4ATgY2VtX2qtoBbASWtmUHVdVtVVXA5V3rkiRNk8leMzi8qh5v008Ah7fpOcBjXe22ttor1beOUh9VklVJBpIMDA0NTXLokqSRer6A3D7R1xSMZTzbWlNV/VXV39fXNx2blKR9wmTD4PvtFA/t+clW3wbM62o3t9VeqT53lLokaRpNNgzWA8N3BK0Aru+qn9nuKloMPNNOJ20AliQ5pF04XgJsaMt+mGRxu4vozK51SZKmyaxdNUhyJfB24LAkW+ncFXQhcE2SlcCjwHtb8xuBU4FB4FngbICq2p7kE8Adrd35VTV8Ufr36Nyx9Drgy+0hSZpGuwyDqjpjjEUnjdK2gHPGWM9aYO0o9QHg6F2NQ5K0+/gNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIYx//2Uhpp/rk3zPQQZtQjF75rpocgTTmPDCRJhoEkqccwSPIfk2xOcl+SK5O8NsmCJLcnGUxydZL9W9vXtPnBtnx+13rOa/UHkpzc42uSJE3QpMMgyRzgPwD9VXU0sB9wOvAp4KKqOhLYAaxsXVYCO1r9otaOJItav6OApcBnk+w32XFJkiau19NEs4DXJZkFHAA8DrwDuLYtXwcsb9PL2jxt+UlJ0upXVdVzVfUwMAic0OO4JEkTMOkwqKptwJ8B36MTAs8Am4Cnq2pna7YVmNOm5wCPtb47W/s3dtdH6fMSSVYlGUgyMDQ0NNmhS5JG6OU00SF0PtUvAN4EvJ7OaZ7dpqrWVFV/VfX39fXtzk1J0j6ll9NE7wQerqqhqvoJcB3wNmB2O20EMBfY1qa3AfMA2vKDgae666P0kSRNg17C4HvA4iQHtHP/JwH3A7cAp7U2K4Dr2/T6Nk9bfnNVVauf3u42WgAsBL7Vw7gkSRM06W8gV9XtSa4F7gR2AncBa4AbgKuS/GmrXdq6XAp8PskgsJ3OHURU1eYk19AJkp3AOVX1wmTHJUmauJ5+jqKqVgOrR5QfYpS7garqx8BvjbGeC4ALehmLpH2DP4eye34OxW8gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJLMTnJtku8k2ZLkXyQ5NMnGJA+250Na2yS5OMlgknuSHNe1nhWt/YNJVvT6oiRJE9PrkcGnga9U1c8DvwhsAc4FbqqqhcBNbR7gFGBhe6wCLgFIciiwGjgROAFYPRwgkqTpMekwSHIw8KvApQBV9XxVPQ0sA9a1ZuuA5W16GXB5ddwGzE5yBHAysLGqtlfVDmAjsHSy45IkTVwvRwYLgCHgb5LcleRzSV4PHF5Vj7c2TwCHt+k5wGNd/be22lj1l0myKslAkoGhoaEehi5J6tZLGMwCjgMuqapfAv6RF08JAVBVBVQP23iJqlpTVf1V1d/X1zdVq5WkfV4vYbAV2FpVt7f5a+mEw/fb6R/a85Nt+TZgXlf/ua02Vl2SNE0mHQZV9QTwWJK3ttJJwP3AemD4jqAVwPVtej1wZruraDHwTDudtAFYkuSQduF4SatJkqbJrB77/3vgiiT7Aw8BZ9MJmGuSrAQeBd7b2t4InAoMAs+2tlTV9iSfAO5o7c6vqu09jkuSNAE9hUFV3Q30j7LopFHaFnDOGOtZC6ztZSySpMnzG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkpiAMkuyX5K4kX2rzC5LcnmQwydVJ9m/117T5wbZ8ftc6zmv1B5Kc3OuYJEkTMxVHBr8PbOma/xRwUVUdCewAVrb6SmBHq1/U2pFkEXA6cBSwFPhskv2mYFySpHHqKQySzAXeBXyuzQd4B3Bta7IOWN6ml7V52vKTWvtlwFVV9VxVPQwMAif0Mi5J0sT0emTw34GPAj9t828Enq6qnW1+KzCnTc8BHgNoy59p7f9/fZQ+L5FkVZKBJANDQ0M9Dl2SNGzSYZDk3cCTVbVpCsfziqpqTVX1V1V/X1/fdG1Wkl71ZvXQ923Ae5KcCrwWOAj4NDA7yaz26X8usK213wbMA7YmmQUcDDzVVR/W3UeSNA0mfWRQVedV1dyqmk/nAvDNVfV+4BbgtNZsBXB9m17f5mnLb66qavXT291GC4CFwLcmOy5J0sT1cmQwlo8BVyX5U+Au4NJWvxT4fJJBYDudAKGqNie5Brgf2AmcU1Uv7IZxSZLGMCVhUFVfA77Wph9ilLuBqurHwG+N0f8C4IKpGIskaeL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJHsIgybwktyS5P8nmJL/f6ocm2ZjkwfZ8SKsnycVJBpPck+S4rnWtaO0fTLKi95clSZqIXo4MdgJ/UFWLgMXAOUkWAecCN1XVQuCmNg9wCrCwPVYBl0AnPIDVwInACcDq4QCRJE2PSYdBVT1eVXe26R8BW4A5wDJgXWu2DljeppcBl1fHbcDsJEcAJwMbq2p7Ve0ANgJLJzsuSdLETck1gyTzgV8CbgcOr6rH26IngMPb9Bzgsa5uW1ttrLokaZr0HAZJDgS+AHyoqn7YvayqCqhet9G1rVVJBpIMDA0NTdVqJWmf11MYJPkndILgiqq6rpW/307/0J6fbPVtwLyu7nNbbaz6y1TVmqrqr6r+vr6+XoYuSerSy91EAS4FtlTVn3ctWg8M3xG0Ari+q35mu6toMfBMO520AViS5JB24XhJq0mSpsmsHvq+Dfht4N4kd7fax4ELgWuSrAQeBd7blt0InAoMAs8CZwNU1fYknwDuaO3Or6rtPYxLkjRBkw6DqvrfQMZYfNIo7Qs4Z4x1rQXWTnYskqTe+A1kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIk9KAySLE3yQJLBJOfO9HgkaV+yR4RBkv2AzwCnAIuAM5IsmtlRSdK+Y48IA+AEYLCqHqqq54GrgGUzPCZJ2mekqmZ6DCQ5DVhaVf+2zf82cGJVfXBEu1XAqjb7VuCBaR3o1DkM+MFMD2Iv5v7rjfuvN3v7/ntzVfWNLM6aiZFMVlWtAdbM9Dh6lWSgqvpnehx7K/dfb9x/vXm17r895TTRNmBe1/zcVpMkTYM9JQzuABYmWZBkf+B0YP0Mj0mS9hl7xGmiqtqZ5IPABmA/YG1VbZ7hYe1Oe/2prhnm/uuN+683r8r9t0dcQJYkzaw95TSRJGkGGQaSJMNgIpIsT1JJfn6M5V9LMqlbzpK859X8MxxJDk/yt0keSrIpya1JfmOKt/Gq2odJ3pjk7vZ4Ism2Nv10kvt3w/belOTaqV7vTEvyc0muSvLd9t67Mck/HaPtsUlOneR29ur95zWDCUhyNfAm4OaqWj3K8q8BH6mqgeke254sSYC/B9ZV1V+12puB91TVX8zo4PYSSf4Y+D9V9WdJ5gNfqqqjZ3ZUe74x3nu/CBxUVd8Ypf1ZQP/IL7zuCzwyGKckBwK/DKykc+srSV7XPnFsSfJF4HVd7Ze0T793JvmfrT9JHknyJ61+7/BRRpKzkvxlm74sycVJ/r59kj6ta73/KckdSe5J8ifTtwd68g7g+eF/jABV9WhV/UX36wZI8qUkb2/TlyQZSLK5+7Xuo/twpP2S/HXbN19t78W3JLlzuEGShcPzbZ99sh1ZDCQ5LsmG9mn537U285Pc16bPSnJdkq8keTDJf+1a76jv7T3UrwM/GfHe+zbwgSTLh2tJrkiyDDgfeF/bT+9LcmiSv2vvlduSHNPa/1rXUdtdSd6wt+8/w2D8lgFfqap/AJ5Kcjzwu8CzVfXPgNXA8QBJDgP+M/DOqjoOGAA+3LWuH7T6JcBHxtjeEXTC593AhW29S4CFdH7L6Vjg+CS/OpUvcjc5Crhzl61e7g/bNz2PAX5t+B9is6/tw5EWAp+pqqOAp4HfrKrvAs8kOba1ORv4m64+36uqY4FvAJcBpwGLgbEC8VjgfcAv0PkDOW8c7+09zdHAplHqlwJnASQ5GPiXwA3AHwFXV9WxVXU1nX1zV1UdA3wcuLz1/whwTtufvwL831G2cSx70f7bI75nsJc4A/h0m76qzR8JXAxQVfckuactX0zn11e/2TlKZX/g1q51XdeeNwH/aozt/V1V/RS4P8nhrbakPe5q8wfS+aPw9cm/rOmX5DN0/kg/T+fXasfy3nR+j2oWnT/si4DhfbxP70Pg4aq6u01vAua36c8BZyf5MJ0/RCd09Rn+Iue9wIFV9SPgR0meSzJ7lG3cVFXPAKRzjeLNwGxe+b29V6iq/5Xks0n6gN8EvtC+7zSy6S+35VTVzelcxzkI+Cbw50muAK6rqq2j9N2r9p9hMA5JDqVzquMXkhSdL8YVL/5BeVkXYGNVnTHG8ufa8wuM/d/gua7pdD1/sqr+x3jHvofYTPsHBVBV57RPSAPATl56hPpagCQL6Hz6+udVtSPJZcPLmn1tH47U/dpe4MVTlF+gc5R6M7Cpqp4apc9PR/T/KaPvw5HbmMWu39t7ms10joBGcznwb+ic9j17IiutqguT3ACcSucP+8nAj0c026v2n6eJxuc04PNV9eaqml9V84CH6Xwi+9cASY6mczoD4DbgbUmObMtenzHuXpigDcDv5MXrD3OS/OwUrHd3uxl4bZLf7aod0J4fAY5N8jNJ5vHiJ9mDgH+kc9rjcDr/r4upsLfuw3Gpqh/TeY2X8NJTRFNld723d5ebgde0I0wAkhyT5FfonCr7EEBVDd+d9SPgDV39vwG8v/V7O53Tkz9M8paqureqPkXn53RGvcNwFHvs/jMMxucM4Isjal8AFgAHJtlC58LTJoCqGqJzPvLKduroVsb/ZhlTVX0V+Fvg1iT3Atfy0jfuHqk6t6wtp3Pe/+Ek3wLWAR+jc7j9MHA/nVNud7Y+36Zz5PUdOq/5m1M0lr1yH07QFXQ+7X91qle8u97bu0t77/0G8M52sXwz8Engiar6PrCFl4bmLcCi4QvIwB/Tua50D53rTitauw8lua/VfwJ8eZzj2WP3n7eWSq8yST4CHFxV/2Wmx7InS3IAnesnxw2f29+Xec1AehVJ5xbnt9C5xqUxJHknnTuKLjIIOjwykCR5zUCSZBhIkjAMJEkYBpIkDANJEvD/AG3vovyAnWp7AAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------- ----- ----------- ------\n",
"total base 42483 Percentage 100%\n",
"Adenine: 12688 Percentage: 29.87%\n",
"Guanine: 8393 Percentage: 19.76%\n",
"Thynime: 13709 Percentage: 32.27%\n",
"Cytosine: 7693 Percentage: 18.11%\n",
"---------- ----- ----------- ------\n",
"GC content: 37.86 %\n"
]
}
],
"source": [
"basic_Analysis(sequence1_All)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "99fef932",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVUUlEQVR4nO3df5TddX3n8eerZFERISBTikmOyZGs3UAphVnIrv1hxRMCeky6pQrrlkCz5myLu3Wtq2B3mxbLEXd7ykqrdFNJCR7Kj0UsOYLGHMDVtYBMAIEQKSM/JDkgIwnolhUMvveP+5nlMsyQmbmTmYQ8H+fcc7/f9/fz+X4/98vNvO73x72kqpAk7dt+ZqYHIEmaeYaBJMkwkCQZBpIkDANJEjBrpgcwWYcddljNnz9/pochSXuVTZs2/aCq+kbW99owmD9/PgMDAzM9DEnaqyR5dLS6p4kkSbsOgyRrkzyZ5L5Rlv1BkkpyWJtPkouTDCa5J8lxXW1XJHmwPVZ01Y9Pcm/rc3GSTNWLkySNz3iODC4Dlo4sJpkHLAG+11U+BVjYHquAS1rbQ4HVwInACcDqJIe0PpcAH+jq97JtSZJ2r12GQVV9Hdg+yqKLgI8C3b9nsQy4vDpuA2YnOQI4GdhYVduragewEVjalh1UVbdV53cxLgeW9/SKJEkTNqlrBkmWAduq6tsjFs0BHuua39pqr1TfOkp9rO2uSjKQZGBoaGgyQ5ckjWLCYZDkAODjwB9N/XBeWVWtqar+qurv63vZnVGSpEmazJHBW4AFwLeTPALMBe5M8nPANmBeV9u5rfZK9bmj1CVJ02jCYVBV91bVz1bV/KqaT+fUznFV9QSwHjiz3VW0GHimqh4HNgBLkhzSLhwvATa0ZT9MsrjdRXQmcP0UvTZJ0jiN59bSK4Fbgbcm2Zpk5Ss0vxF4CBgE/hr4PYCq2g58ArijPc5vNVqbz7U+3wW+PLmXIkmarOyt/3Ob/v7+8hvI2hvNP/eGmR7CjHrkwnfN9BD2aUk2VVX/yLrfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJKAWTM9gJngTwj7E8KSXsojA0mSYSBJMgwkSRgGkiQMA0kShoEkiXGEQZK1SZ5Mcl9X7b8l+U6Se5J8McnsrmXnJRlM8kCSk7vqS1ttMMm5XfUFSW5v9auT7D+Fr0+SNA7jOTK4DFg6orYROLqqjgH+ATgPIMki4HTgqNbns0n2S7If8BngFGARcEZrC/Ap4KKqOhLYAazs6RVJkiZsl2FQVV8Hto+ofbWqdrbZ24C5bXoZcFVVPVdVDwODwAntMVhVD1XV88BVwLIkAd4BXNv6rwOW9/aSJEkTNRXXDH4H+HKbngM81rVsa6uNVX8j8HRXsAzXJUnTqKcwSPKHwE7giqkZzi63tyrJQJKBoaGh6dikJO0TJh0GSc4C3g28v6qqlbcB87qazW21sepPAbOTzBpRH1VVramq/qrq7+vrm+zQJUkjTCoMkiwFPgq8p6qe7Vq0Hjg9yWuSLAAWAt8C7gAWtjuH9qdzkXl9C5FbgNNa/xXA9ZN7KZKkyRrPraVXArcCb02yNclK4C+BNwAbk9yd5K8AqmozcA1wP/AV4JyqeqFdE/ggsAHYAlzT2gJ8DPhwkkE61xAundJXKEnapV3+hHVVnTFKecw/2FV1AXDBKPUbgRtHqT9E524jSdIM8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRLjCIMka5M8meS+rtqhSTYmebA9H9LqSXJxksEk9yQ5rqvPitb+wSQruurHJ7m39bk4Sab6RUqSXtl4jgwuA5aOqJ0L3FRVC4Gb2jzAKcDC9lgFXAKd8ABWAycCJwCrhwOktflAV7+R25Ik7Wa7DIOq+jqwfUR5GbCuTa8DlnfVL6+O24DZSY4ATgY2VtX2qtoBbASWtmUHVdVtVVXA5V3rkiRNk8leMzi8qh5v008Ah7fpOcBjXe22ttor1beOUh9VklVJBpIMDA0NTXLokqSRer6A3D7R1xSMZTzbWlNV/VXV39fXNx2blKR9wmTD4PvtFA/t+clW3wbM62o3t9VeqT53lLokaRpNNgzWA8N3BK0Aru+qn9nuKloMPNNOJ20AliQ5pF04XgJsaMt+mGRxu4vozK51SZKmyaxdNUhyJfB24LAkW+ncFXQhcE2SlcCjwHtb8xuBU4FB4FngbICq2p7kE8Adrd35VTV8Ufr36Nyx9Drgy+0hSZpGuwyDqjpjjEUnjdK2gHPGWM9aYO0o9QHg6F2NQ5K0+/gNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIYx//2Uhpp/rk3zPQQZtQjF75rpocgTTmPDCRJhoEkqccwSPIfk2xOcl+SK5O8NsmCJLcnGUxydZL9W9vXtPnBtnx+13rOa/UHkpzc42uSJE3QpMMgyRzgPwD9VXU0sB9wOvAp4KKqOhLYAaxsXVYCO1r9otaOJItav6OApcBnk+w32XFJkiau19NEs4DXJZkFHAA8DrwDuLYtXwcsb9PL2jxt+UlJ0upXVdVzVfUwMAic0OO4JEkTMOkwqKptwJ8B36MTAs8Am4Cnq2pna7YVmNOm5wCPtb47W/s3dtdH6fMSSVYlGUgyMDQ0NNmhS5JG6OU00SF0PtUvAN4EvJ7OaZ7dpqrWVFV/VfX39fXtzk1J0j6ll9NE7wQerqqhqvoJcB3wNmB2O20EMBfY1qa3AfMA2vKDgae666P0kSRNg17C4HvA4iQHtHP/JwH3A7cAp7U2K4Dr2/T6Nk9bfnNVVauf3u42WgAsBL7Vw7gkSRM06W8gV9XtSa4F7gR2AncBa4AbgKuS/GmrXdq6XAp8PskgsJ3OHURU1eYk19AJkp3AOVX1wmTHJUmauJ5+jqKqVgOrR5QfYpS7garqx8BvjbGeC4ALehmLpH2DP4eye34OxW8gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJLMTnJtku8k2ZLkXyQ5NMnGJA+250Na2yS5OMlgknuSHNe1nhWt/YNJVvT6oiRJE9PrkcGnga9U1c8DvwhsAc4FbqqqhcBNbR7gFGBhe6wCLgFIciiwGjgROAFYPRwgkqTpMekwSHIw8KvApQBV9XxVPQ0sA9a1ZuuA5W16GXB5ddwGzE5yBHAysLGqtlfVDmAjsHSy45IkTVwvRwYLgCHgb5LcleRzSV4PHF5Vj7c2TwCHt+k5wGNd/be22lj1l0myKslAkoGhoaEehi5J6tZLGMwCjgMuqapfAv6RF08JAVBVBVQP23iJqlpTVf1V1d/X1zdVq5WkfV4vYbAV2FpVt7f5a+mEw/fb6R/a85Nt+TZgXlf/ua02Vl2SNE0mHQZV9QTwWJK3ttJJwP3AemD4jqAVwPVtej1wZruraDHwTDudtAFYkuSQduF4SatJkqbJrB77/3vgiiT7Aw8BZ9MJmGuSrAQeBd7b2t4InAoMAs+2tlTV9iSfAO5o7c6vqu09jkuSNAE9hUFV3Q30j7LopFHaFnDOGOtZC6ztZSySpMnzG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkpiAMkuyX5K4kX2rzC5LcnmQwydVJ9m/117T5wbZ8ftc6zmv1B5Kc3OuYJEkTMxVHBr8PbOma/xRwUVUdCewAVrb6SmBHq1/U2pFkEXA6cBSwFPhskv2mYFySpHHqKQySzAXeBXyuzQd4B3Bta7IOWN6ml7V52vKTWvtlwFVV9VxVPQwMAif0Mi5J0sT0emTw34GPAj9t828Enq6qnW1+KzCnTc8BHgNoy59p7f9/fZQ+L5FkVZKBJANDQ0M9Dl2SNGzSYZDk3cCTVbVpCsfziqpqTVX1V1V/X1/fdG1Wkl71ZvXQ923Ae5KcCrwWOAj4NDA7yaz26X8usK213wbMA7YmmQUcDDzVVR/W3UeSNA0mfWRQVedV1dyqmk/nAvDNVfV+4BbgtNZsBXB9m17f5mnLb66qavXT291GC4CFwLcmOy5J0sT1cmQwlo8BVyX5U+Au4NJWvxT4fJJBYDudAKGqNie5Brgf2AmcU1Uv7IZxSZLGMCVhUFVfA77Wph9ilLuBqurHwG+N0f8C4IKpGIskaeL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJHsIgybwktyS5P8nmJL/f6ocm2ZjkwfZ8SKsnycVJBpPck+S4rnWtaO0fTLKi95clSZqIXo4MdgJ/UFWLgMXAOUkWAecCN1XVQuCmNg9wCrCwPVYBl0AnPIDVwInACcDq4QCRJE2PSYdBVT1eVXe26R8BW4A5wDJgXWu2DljeppcBl1fHbcDsJEcAJwMbq2p7Ve0ANgJLJzsuSdLETck1gyTzgV8CbgcOr6rH26IngMPb9Bzgsa5uW1ttrLokaZr0HAZJDgS+AHyoqn7YvayqCqhet9G1rVVJBpIMDA0NTdVqJWmf11MYJPkndILgiqq6rpW/307/0J6fbPVtwLyu7nNbbaz6y1TVmqrqr6r+vr6+XoYuSerSy91EAS4FtlTVn3ctWg8M3xG0Ari+q35mu6toMfBMO520AViS5JB24XhJq0mSpsmsHvq+Dfht4N4kd7fax4ELgWuSrAQeBd7blt0InAoMAs8CZwNU1fYknwDuaO3Or6rtPYxLkjRBkw6DqvrfQMZYfNIo7Qs4Z4x1rQXWTnYskqTe+A1kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIk9KAySLE3yQJLBJOfO9HgkaV+yR4RBkv2AzwCnAIuAM5IsmtlRSdK+Y48IA+AEYLCqHqqq54GrgGUzPCZJ2mekqmZ6DCQ5DVhaVf+2zf82cGJVfXBEu1XAqjb7VuCBaR3o1DkM+MFMD2Iv5v7rjfuvN3v7/ntzVfWNLM6aiZFMVlWtAdbM9Dh6lWSgqvpnehx7K/dfb9x/vXm17r895TTRNmBe1/zcVpMkTYM9JQzuABYmWZBkf+B0YP0Mj0mS9hl7xGmiqtqZ5IPABmA/YG1VbZ7hYe1Oe/2prhnm/uuN+683r8r9t0dcQJYkzaw95TSRJGkGGQaSJMNgIpIsT1JJfn6M5V9LMqlbzpK859X8MxxJDk/yt0keSrIpya1JfmOKt/Gq2odJ3pjk7vZ4Ism2Nv10kvt3w/belOTaqV7vTEvyc0muSvLd9t67Mck/HaPtsUlOneR29ur95zWDCUhyNfAm4OaqWj3K8q8BH6mqgeke254sSYC/B9ZV1V+12puB91TVX8zo4PYSSf4Y+D9V9WdJ5gNfqqqjZ3ZUe74x3nu/CBxUVd8Ypf1ZQP/IL7zuCzwyGKckBwK/DKykc+srSV7XPnFsSfJF4HVd7Ze0T793JvmfrT9JHknyJ61+7/BRRpKzkvxlm74sycVJ/r59kj6ta73/KckdSe5J8ifTtwd68g7g+eF/jABV9WhV/UX36wZI8qUkb2/TlyQZSLK5+7Xuo/twpP2S/HXbN19t78W3JLlzuEGShcPzbZ99sh1ZDCQ5LsmG9mn537U285Pc16bPSnJdkq8keTDJf+1a76jv7T3UrwM/GfHe+zbwgSTLh2tJrkiyDDgfeF/bT+9LcmiSv2vvlduSHNPa/1rXUdtdSd6wt+8/w2D8lgFfqap/AJ5Kcjzwu8CzVfXPgNXA8QBJDgP+M/DOqjoOGAA+3LWuH7T6JcBHxtjeEXTC593AhW29S4CFdH7L6Vjg+CS/OpUvcjc5Crhzl61e7g/bNz2PAX5t+B9is6/tw5EWAp+pqqOAp4HfrKrvAs8kOba1ORv4m64+36uqY4FvAJcBpwGLgbEC8VjgfcAv0PkDOW8c7+09zdHAplHqlwJnASQ5GPiXwA3AHwFXV9WxVXU1nX1zV1UdA3wcuLz1/whwTtufvwL831G2cSx70f7bI75nsJc4A/h0m76qzR8JXAxQVfckuactX0zn11e/2TlKZX/g1q51XdeeNwH/aozt/V1V/RS4P8nhrbakPe5q8wfS+aPw9cm/rOmX5DN0/kg/T+fXasfy3nR+j2oWnT/si4DhfbxP70Pg4aq6u01vAua36c8BZyf5MJ0/RCd09Rn+Iue9wIFV9SPgR0meSzJ7lG3cVFXPAKRzjeLNwGxe+b29V6iq/5Xks0n6gN8EvtC+7zSy6S+35VTVzelcxzkI+Cbw50muAK6rqq2j9N2r9p9hMA5JDqVzquMXkhSdL8YVL/5BeVkXYGNVnTHG8ufa8wuM/d/gua7pdD1/sqr+x3jHvofYTPsHBVBV57RPSAPATl56hPpagCQL6Hz6+udVtSPJZcPLmn1tH47U/dpe4MVTlF+gc5R6M7Cpqp4apc9PR/T/KaPvw5HbmMWu39t7ms10joBGcznwb+ic9j17IiutqguT3ACcSucP+8nAj0c026v2n6eJxuc04PNV9eaqml9V84CH6Xwi+9cASY6mczoD4DbgbUmObMtenzHuXpigDcDv5MXrD3OS/OwUrHd3uxl4bZLf7aod0J4fAY5N8jNJ5vHiJ9mDgH+kc9rjcDr/r4upsLfuw3Gpqh/TeY2X8NJTRFNld723d5ebgde0I0wAkhyT5FfonCr7EEBVDd+d9SPgDV39vwG8v/V7O53Tkz9M8paqureqPkXn53RGvcNwFHvs/jMMxucM4Isjal8AFgAHJtlC58LTJoCqGqJzPvLKduroVsb/ZhlTVX0V+Fvg1iT3Atfy0jfuHqk6t6wtp3Pe/+Ek3wLWAR+jc7j9MHA/nVNud7Y+36Zz5PUdOq/5m1M0lr1yH07QFXQ+7X91qle8u97bu0t77/0G8M52sXwz8Engiar6PrCFl4bmLcCi4QvIwB/Tua50D53rTitauw8lua/VfwJ8eZzj2WP3n7eWSq8yST4CHFxV/2Wmx7InS3IAnesnxw2f29+Xec1AehVJ5xbnt9C5xqUxJHknnTuKLjIIOjwykCR5zUCSZBhIkjAMJEkYBpIkDANJEvD/AG3vovyAnWp7AAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"---------- ----- ----------- ------\n",
"total base 42483 Percentage 100%\n",
"Adenine: 12686 Percentage: 29.86%\n",
"Guanine: 8394 Percentage: 19.76%\n",
"Thynime: 13713 Percentage: 32.28%\n",
"Cytosine: 7690 Percentage: 18.1%\n",
"---------- ----- ----------- ------\n",
"GC content: 37.86 %\n"
]
}
],
"source": [
"basic_Analysis(sequence2_All)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "2b164b49",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" line no | \n",
" sequence ratios | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 607 | \n",
" 608 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 608 | \n",
" 609 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 609 | \n",
" 610 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 610 | \n",
" 611 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 611 | \n",
" 612 | \n",
" 0.989474 | \n",
"
\n",
" \n",
"
\n",
"
612 rows × 2 columns
\n",
"
"
],
"text/plain": [
" line no sequence ratios\n",
"0 1 1.000000\n",
"1 2 1.000000\n",
"2 3 1.000000\n",
"3 4 1.000000\n",
"4 5 1.000000\n",
".. ... ...\n",
"607 608 1.000000\n",
"608 609 1.000000\n",
"609 610 1.000000\n",
"610 611 1.000000\n",
"611 612 0.989474\n",
"\n",
"[612 rows x 2 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sequence"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "5e3758a9",
"metadata": {},
"outputs": [],
"source": [
"def Fapriori(itemSetList, minSup, minConf):\n",
" C1ItemSet = getItemSetFromList(itemSetList)\n",
" # Final result global frequent itemset\n",
" globalFreqItemSet = dict()\n",
" # Storing global itemset with support count\n",
" globalItemSetWithSup = defaultdict(int)\n",
"\n",
" L1ItemSet = getAboveMinSup(\n",
" C1ItemSet, itemSetList, minSup, globalItemSetWithSup)\n",
" currentLSet = L1ItemSet\n",
" k = 2\n",
"\n",
" # Calculating frequent item set\n",
" while(currentLSet):\n",
" # Storing frequent itemset\n",
" globalFreqItemSet[k-1] = currentLSet\n",
" # Self-joining Lk\n",
" candidateSet = getUnion(currentLSet, k)\n",
" # Perform subset testing and remove pruned supersets\n",
" candidateSet = pruning(candidateSet, currentLSet, k-1)\n",
" # Scanning itemSet for counting support\n",
" currentLSet = getAboveMinSup(\n",
" candidateSet, itemSetList, minSup, globalItemSetWithSup)\n",
" k += 1\n",
"\n",
" rules = FassociationRule(globalFreqItemSet, globalItemSetWithSup, minConf)\n",
" rules.sort(key=lambda x: x[2])\n",
" return globalFreqItemSet, rules"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "88d8387a",
"metadata": {},
"outputs": [],
"source": [
"def FaprioriFromFile(fname, minSup, minConf):\n",
" C1ItemSet, itemSetList = getFromFile(fname)\n",
"\n",
" # Final result global frequent itemset\n",
" globalFreqItemSet = dict()\n",
" # Storing global itemset with support count\n",
" globalItemSetWithSup = defaultdict(int)\n",
"\n",
" L1ItemSet = getAboveMinSup(\n",
" C1ItemSet, itemSetList, minSup, globalItemSetWithSup)\n",
" currentLSet = L1ItemSet\n",
" k = 2\n",
"\n",
" # Calculating frequent item set\n",
" while(currentLSet):\n",
" # Storing frequent itemset\n",
" globalFreqItemSet[k-1] = currentLSet\n",
" # Self-joining Lk\n",
" candidateSet = getUnion(currentLSet, k)\n",
" # Perform subset testing and remove pruned supersets\n",
" candidateSet = pruning(candidateSet, currentLSet, k-1)\n",
" # Scanning itemSet for counting support\n",
" currentLSet = getAboveMinSup(\n",
" candidateSet, itemSetList, minSup, globalItemSetWithSup)\n",
" k += 1\n",
"\n",
" rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)\n",
" rules.sort(key=lambda x: x[2])\n",
"\n",
" return globalFreqItemSet, rules"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c8bd8453",
"metadata": {},
"outputs": [],
"source": [
"def powerset(s):\n",
" return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "04a9b357",
"metadata": {},
"outputs": [],
"source": [
"def getFromFile(fname):\n",
" itemSets = []\n",
" itemSet = set()\n",
"\n",
" with open(fname, 'r') as file:\n",
" csv_reader = reader(file)\n",
" for line in csv_reader:\n",
" line = list(filter(None, line))\n",
" record = set(line)\n",
" for item in record:\n",
" itemSet.add(frozenset([item]))\n",
" itemSets.append(record)\n",
" return itemSet, itemSets "
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "e61fb8d0",
"metadata": {},
"outputs": [],
"source": [
"def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):\n",
" freqItemSet = set()\n",
" localItemSetWithSup = defaultdict(int)\n",
"\n",
" for item in itemSet:\n",
" for itemSet in itemSetList:\n",
" if item.issubset(itemSet):\n",
" globalItemSetWithSup[item] += 1\n",
" localItemSetWithSup[item] += 1\n",
"\n",
" for item, supCount in localItemSetWithSup.items():\n",
" support = float(supCount / len(itemSetList))\n",
" if(support >= minSup):\n",
" freqItemSet.add(item)\n",
"\n",
" return freqItemSet"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "4d46b1b7",
"metadata": {},
"outputs": [],
"source": [
"def getUnion(itemSet, length):\n",
" return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "c4288f6d",
"metadata": {},
"outputs": [],
"source": [
"def pruning(candidateSet, prevFreqSet, length):\n",
" tempCandidateSet = candidateSet.copy()\n",
" for item in candidateSet:\n",
" subsets = combinations(item, length)\n",
" for subset in subsets:\n",
" # if the subset is not in previous K-frequent get, then remove the set\n",
" if(frozenset(subset) not in prevFreqSet):\n",
" tempCandidateSet.remove(item)\n",
" break\n",
" return tempCandidateSet"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "4152e510",
"metadata": {},
"outputs": [],
"source": [
"def FassociationRule(freqItemSet, itemSetWithSup, minConf):\n",
" rules = []\n",
" for k, itemSet in freqItemSet.items():\n",
" for item in itemSet:\n",
" subsets = powerset(item)\n",
" for s in subsets:\n",
" confidence = float(\n",
" itemSetWithSup[item] / itemSetWithSup[frozenset(s)])\n",
" if(confidence > minConf):\n",
" rules.append([set(s), set(item.difference(s)), confidence])\n",
" return rules"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "1b2c0b08",
"metadata": {},
"outputs": [],
"source": [
"def getItemSetFromList(itemSetList):\n",
" tempItemSet = set()\n",
"\n",
" for itemSet in itemSetList:\n",
" for item in itemSet:\n",
" tempItemSet.add(frozenset([item]))\n",
"\n",
" return tempItemSet"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "e3e50e57",
"metadata": {},
"outputs": [],
"source": [
"g_sequence1=list()\n",
"g_sequence2=list()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "c3b0cf11",
"metadata": {},
"outputs": [],
"source": [
"count=1"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "fe191c97",
"metadata": {},
"outputs": [],
"source": [
"with open(input_file1) as file1:\n",
" for lineno in file1:\n",
" if count==3 or count==70 or count ==94 or count==115 or count==130 or count==139 or count==328 or count==415:\n",
" lineno = list(lineno)\n",
" lineno.remove(\"\\n\")\n",
" g_sequence1.append(lineno)\n",
" count+=1\n",
" else:\n",
" count+=1"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "375b500c",
"metadata": {},
"outputs": [],
"source": [
"count1=1"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "d3275fdb",
"metadata": {},
"outputs": [],
"source": [
"with open(input_file2) as file2:\n",
" for lineno in file2:\n",
" if count1==3 or count1==70 or count1 ==94 or count1==115 or count1==130 or count1==139 or count1==328 or count1==415:\n",
" lineno = list(lineno)\n",
" lineno.remove(\"\\n\")\n",
" g_sequence2.append(lineno)\n",
" count1+=1\n",
" else:\n",
" count1+=1"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "880356a8",
"metadata": {},
"outputs": [],
"source": [
"freqItemSet, rules = Fapriori(g_sequence1, minSup=.5, minConf=.5)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "4bfa662f",
"metadata": {},
"outputs": [],
"source": [
"freqItemSet1, rules1 = Fapriori(g_sequence2, minSup=.5, minConf=.5)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "beecdaae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{1: {frozenset({'T'}), frozenset({'C'}), frozenset({'G'}), frozenset({'A'})}, 2: {frozenset({'G', 'C'}), frozenset({'G', 'A'}), frozenset({'C', 'A'}), frozenset({'T', 'C'}), frozenset({'G', 'T'}), frozenset({'T', 'A'})}, 3: {frozenset({'T', 'C', 'A'}), frozenset({'G', 'C', 'T'}), frozenset({'G', 'C', 'A'}), frozenset({'G', 'T', 'A'})}, 4: {frozenset({'G', 'C', 'A', 'T'})}} [[{'G'}, {'C'}, 1.0], [{'C'}, {'G'}, 1.0], [{'G'}, {'A'}, 1.0], [{'A'}, {'G'}, 1.0], [{'C'}, {'A'}, 1.0], [{'A'}, {'C'}, 1.0], [{'T'}, {'C'}, 1.0], [{'C'}, {'T'}, 1.0], [{'G'}, {'T'}, 1.0], [{'T'}, {'G'}, 1.0], [{'T'}, {'A'}, 1.0], [{'A'}, {'T'}, 1.0], [{'T'}, {'C', 'A'}, 1.0], [{'C'}, {'T', 'A'}, 1.0], [{'A'}, {'T', 'C'}, 1.0], [{'T', 'C'}, {'A'}, 1.0], [{'T', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'T'}, 1.0], [{'G'}, {'T', 'C'}, 1.0], [{'C'}, {'G', 'T'}, 1.0], [{'T'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'T'}, 1.0], [{'G', 'T'}, {'C'}, 1.0], [{'T', 'C'}, {'G'}, 1.0], [{'G'}, {'C', 'A'}, 1.0], [{'C'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'A'}, 1.0], [{'G', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'A'}, 1.0], [{'T'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'T'}, 1.0], [{'G', 'T'}, {'A'}, 1.0], [{'G', 'A'}, {'T'}, 1.0], [{'T', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'C', 'A'}, 1.0], [{'C'}, {'G', 'T', 'A'}, 1.0], [{'A'}, {'G', 'C', 'T'}, 1.0], [{'T'}, {'G', 'C', 'A'}, 1.0], [{'G', 'C'}, {'T', 'A'}, 1.0], [{'G', 'A'}, {'T', 'C'}, 1.0], [{'G', 'T'}, {'C', 'A'}, 1.0], [{'C', 'A'}, {'G', 'T'}, 1.0], [{'T', 'C'}, {'G', 'A'}, 1.0], [{'T', 'A'}, {'G', 'C'}, 1.0], [{'G', 'C', 'A'}, {'T'}, 1.0], [{'G', 'C', 'T'}, {'A'}, 1.0], [{'G', 'T', 'A'}, {'C'}, 1.0], [{'T', 'C', 'A'}, {'G'}, 1.0]]\n"
]
}
],
"source": [
"print(freqItemSet,rules)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "bd4004c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{1: {frozenset({'T'}), frozenset({'C'}), frozenset({'G'}), frozenset({'A'})}, 2: {frozenset({'G', 'C'}), frozenset({'G', 'A'}), frozenset({'C', 'A'}), frozenset({'T', 'C'}), frozenset({'G', 'T'}), frozenset({'T', 'A'})}, 3: {frozenset({'T', 'C', 'A'}), frozenset({'G', 'C', 'T'}), frozenset({'G', 'C', 'A'}), frozenset({'G', 'T', 'A'})}, 4: {frozenset({'G', 'C', 'A', 'T'})}} [[{'G'}, {'C'}, 1.0], [{'C'}, {'G'}, 1.0], [{'G'}, {'A'}, 1.0], [{'A'}, {'G'}, 1.0], [{'C'}, {'A'}, 1.0], [{'A'}, {'C'}, 1.0], [{'T'}, {'C'}, 1.0], [{'C'}, {'T'}, 1.0], [{'G'}, {'T'}, 1.0], [{'T'}, {'G'}, 1.0], [{'T'}, {'A'}, 1.0], [{'A'}, {'T'}, 1.0], [{'T'}, {'C', 'A'}, 1.0], [{'C'}, {'T', 'A'}, 1.0], [{'A'}, {'T', 'C'}, 1.0], [{'T', 'C'}, {'A'}, 1.0], [{'T', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'T'}, 1.0], [{'G'}, {'T', 'C'}, 1.0], [{'C'}, {'G', 'T'}, 1.0], [{'T'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'T'}, 1.0], [{'G', 'T'}, {'C'}, 1.0], [{'T', 'C'}, {'G'}, 1.0], [{'G'}, {'C', 'A'}, 1.0], [{'C'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'C'}, 1.0], [{'G', 'C'}, {'A'}, 1.0], [{'G', 'A'}, {'C'}, 1.0], [{'C', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'A'}, 1.0], [{'T'}, {'G', 'A'}, 1.0], [{'A'}, {'G', 'T'}, 1.0], [{'G', 'T'}, {'A'}, 1.0], [{'G', 'A'}, {'T'}, 1.0], [{'T', 'A'}, {'G'}, 1.0], [{'G'}, {'T', 'C', 'A'}, 1.0], [{'C'}, {'G', 'T', 'A'}, 1.0], [{'A'}, {'G', 'C', 'T'}, 1.0], [{'T'}, {'G', 'C', 'A'}, 1.0], [{'G', 'C'}, {'T', 'A'}, 1.0], [{'G', 'A'}, {'T', 'C'}, 1.0], [{'G', 'T'}, {'C', 'A'}, 1.0], [{'C', 'A'}, {'G', 'T'}, 1.0], [{'T', 'C'}, {'G', 'A'}, 1.0], [{'T', 'A'}, {'G', 'C'}, 1.0], [{'G', 'C', 'A'}, {'T'}, 1.0], [{'G', 'C', 'T'}, {'A'}, 1.0], [{'G', 'T', 'A'}, {'C'}, 1.0], [{'T', 'C', 'A'}, {'G'}, 1.0]]\n"
]
}
],
"source": [
"print(freqItemSet1,rules1)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "5d6d2717",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(freqItemSet1)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "016bf481",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"IOPub data rate exceeded.\n",
"The notebook server will temporarily stop sending output\n",
"to the client in order to avoid crashing it.\n",
"To change this limit, set the config variable\n",
"`--NotebookApp.iopub_data_rate_limit`.\n",
"\n",
"Current values:\n",
"NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
"NotebookApp.rate_limit_window=3.0 (secs)\n",
"\n"
]
}
],
"source": [
"freqItemSet, rules = fpgrowth(g_sequence1,minSupRatio=0.5,minConf=0.5)\n",
"print(rules) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "482b6678",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}