{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "TL5y5fY9Jy_x"
},
"source": [
"# Code to generate TensorFlow models for predicting mean MFE and stdev based on nucleotide content.\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bMr7MPVmoiHf"
},
"source": [
"## Use the right version of TensorFlow\n",
"\n",
"The following hidden code cell ensures that the Colab will run on TensorFlow 2.X, which is the most recent version of TensorFlow:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"id": "Z1pOWL7eevO8",
"outputId": "3021d2be-8362-4579-9360-e2f92a57545d",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.\n"
]
}
],
"source": [
"#@title Run this Colab on TensorFlow 2.x\n",
"%tensorflow_version 2.x\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xchnxAsaKKqO"
},
"source": [
"## Import relevant modules\n",
"\n",
"The following cell imports the packages that the program requires:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "9n9_cTveKmse"
},
"outputs": [],
"source": [
"import io\n",
"import sys\n",
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import tensorflow_probability as tfp\n",
"from tensorflow.python.keras.metrics import Metric\n",
"from matplotlib import pyplot as plt\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"from tensorflow.keras.layers.experimental import preprocessing\n",
"\n",
"%matplotlib inline\n",
"pd.options.display.float_format = \"{:.1f}\".format\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ViHIlMwLMypg",
"outputId": "23409807-e93a-4707-a559-a779201a5e97"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2.8.2\n"
]
}
],
"source": [
"print(tf.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SIpsyJITPcbG"
},
"source": [
"## Define functions that build and train a model\n",
"\n",
"The following code defines two functions:\n",
"\n",
" * `build_model(my_learning_rate)`, which builds an empty model.\n",
" * `train_model(model, feature, label, epochs)`, which trains the model from the examples (feature and label) you pass. \n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "5S5unXRJ1rMb"
},
"outputs": [],
"source": [
"def normalizer(shape): \n",
" preprocessing.Normalization(input_shape=shape)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"id": "xvO_beKVP1Ke"
},
"outputs": [],
"source": [
"#@title Define custom functions\n",
"def build_model(my_learning_rate, units, shape):\n",
" \"\"\"Create and compile a simple linear regression model.\"\"\"\n",
" # Most simple tf.keras models are sequential. \n",
" # A sequential model contains one or more layers.\n",
" model = tf.keras.models.Sequential()\n",
" \n",
" # Add normalization layer\n",
" model.add(preprocessing.Normalization(input_shape=shape))\n",
"\n",
" # Add hidden non-linear layers\n",
" model.add(tf.keras.layers.Dense(units=64, input_shape=shape, activation=\"relu\"))\n",
" model.add(tf.keras.layers.Dense(units=64, input_shape=shape, activation=\"sigmoid\")) \n",
" \n",
" # Describe the topography of the model.\n",
" # The topography of a simple linear regression model\n",
" # is a single node in a single layer. \n",
" model.add(tf.keras.layers.Dense(units=units, \n",
" input_shape=shape))\n",
" \n",
" # Compile the model topography into code that \n",
" # TensorFlow can efficiently execute. Configure \n",
" # training to minimize the model's mean squared error. \n",
" model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),\n",
" loss=\"mean_squared_error\",\n",
" metrics=[tf.keras.metrics.RootMeanSquaredError()])\n",
"\n",
" return model \n",
"\n",
"\n",
"def train_model(model, feature, label, epochs, batch_size):\n",
" \"\"\"Train the model by feeding it data.\"\"\"\n",
"\n",
" # Feed the feature values and the label values to the \n",
" # model. The model will train for the specified number \n",
" # of epochs, gradually learning how the feature values\n",
" # relate to the label values. \n",
" history = model.fit(x=feature,\n",
" y=label,\n",
" batch_size=batch_size,\n",
" epochs=epochs)\n",
"\n",
" # Gather the trained model's weight and bias.\n",
" trained_weight = model.get_weights()[0]\n",
" trained_bias = model.get_weights()[1]\n",
"\n",
" # The list of epochs is stored separately from the \n",
" # rest of history.\n",
" epochs = history.epoch\n",
" \n",
" # Gather the history (a snapshot) of each epoch.\n",
" hist = pd.DataFrame(history.history)\n",
"\n",
" # Specifically gather the model's root mean \n",
" #squared error at each epoch. \n",
" rmse = hist[\"root_mean_squared_error\"]\n",
"\n",
" return trained_weight, trained_bias, epochs, rmse\n",
"\n",
"\n",
"\n",
"def get_gc_content(frag):\n",
" ###Ensure frag is string\n",
" frag = str(frag)\n",
" if 'C' and 'G' in frag:\n",
" A_count = frag.count(\"A\")+frag.count(\"a\")\n",
" G_count = frag.count(\"G\")+frag.count(\"g\")\n",
" C_count = frag.count(\"C\")+frag.count(\"c\")\n",
" T_count = frag.count(\"T\")+frag.count(\"t\")+frag.count(\"U\")+frag.count(\"u\")\n",
" gc_content = round(float(G_count+C_count)/float(A_count+T_count+G_count+C_count),5)\n",
" else:\n",
" gc_content = 0\n",
" \n",
" return gc_content\n",
"\n",
"def get_cg_ratio(frag):\n",
" frag = str(frag)\n",
" if 'C' and 'G' in frag:\n",
" A_count = frag.count(\"A\")+frag.count(\"a\")\n",
" G_count = frag.count(\"G\")+frag.count(\"g\")\n",
" C_count = frag.count(\"C\")+frag.count(\"c\")\n",
" T_count = frag.count(\"T\")+frag.count(\"t\")+frag.count(\"U\")+frag.count(\"u\")\n",
" cg_ratio = C_count/(C_count+G_count)\n",
" else:\n",
" cg_ratio = 0\n",
"\n",
" return cg_ratio\n",
"\n",
"def get_au_ratio(frag):\n",
" frag = str(frag)\n",
" if 'A' and 'U' in frag:\n",
" A_count = frag.count(\"A\")+frag.count(\"a\")\n",
" G_count = frag.count(\"G\")+frag.count(\"g\")\n",
" C_count = frag.count(\"C\")+frag.count(\"c\")\n",
" T_count = frag.count(\"T\")+frag.count(\"t\")+frag.count(\"U\")+frag.count(\"u\")\n",
" au_ratio = A_count/(A_count+T_count)\n",
" else:\n",
" au_ratio = 0\n",
"\n",
" return au_ratio\n",
"\n",
"\n",
"def get_di_freqs(frag):\n",
" ### code taken from https://pythonforbiologists.com/dictionaries\n",
" frag = str(frag)\n",
" frag_list = [frag[i:i+2] for i in range(0, len(frag))]\n",
" dinucleotides = ['AA','AU','AG','AC',\n",
" 'UA','UU','UG','UC',\n",
" 'GA','GU','GG','GC',\n",
" 'CA','CU','CG','CC']\n",
" all_counts = []\n",
" for dinucleotide in dinucleotides:\n",
" count = frag_list.count(dinucleotide)\n",
" #print(\"count is \" + str(count) + \" for \" + dinucleotide)\n",
" all_counts.append(count/(len(frag)-1))\n",
"\n",
" return(all_counts)\n",
"\n",
"def get_dinucleotide_counts(frag):\n",
" ### code taken from https://pythonforbiologists.com/dictionaries\n",
" frag = str(frag)\n",
" frag_list = [frag[i:i+2] for i in range(0, len(frag))]\n",
" print(frag_list)\n",
" dinucleotides = ['AA','AU','AG','AC',\n",
" 'UA','UU','UG','UC',\n",
" 'GA','GU','GG','GC',\n",
" 'CA','CU','CG','CC']\n",
" all_counts = []\n",
" for dinucleotide in dinucleotides:\n",
" count = frag_list.count(dinucleotide)\n",
" #print(\"count is \" + str(count) + \" for \" + dinucleotide)\n",
" all_counts.append(count)\n",
"\n",
" return(all_counts)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ak_TMAzGOIFq"
},
"source": [
"## Define plotting functions\n",
"\n",
"We're using a popular Python library called [Matplotlib](https://developers.google.com/machine-learning/glossary/#matplotlib) to create the following two plots:\n",
"\n",
"* a plot of the feature values vs. the label values, and a line showing the output of the trained model.\n",
"* a [loss curve](https://developers.google.com/machine-learning/glossary/#loss_curve)."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QF0BFRXTOeR3",
"outputId": "25f6c25b-dc46-4bb7-91e8-63884734a213"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Defined the plot_the_model and plot_the_loss_curve functions.\n"
]
}
],
"source": [
"#@title Define the plotting functions\n",
"def plot_the_model(trained_weight, trained_bias, feature, label):\n",
" \"\"\"Plot the trained model against the training feature and label.\"\"\"\n",
"\n",
" # Label the axes.\n",
" plt.xlabel(\"feature\")\n",
" plt.ylabel(\"label\")\n",
"\n",
" # Plot the feature values vs. label values.\n",
" plt.scatter(feature, label)\n",
"\n",
" # Create a red line representing the model. The red line starts\n",
" # at coordinates (x0, y0) and ends at coordinates (x1, y1).\n",
" x0 = 0\n",
" y0 = trained_bias\n",
" x1 = my_feature[-1]\n",
" y1 = trained_bias + (trained_weight * x1)\n",
" plt.plot([x0, x1], [y0, y1], c='r')\n",
"\n",
" # Render the scatter plot and the red line.\n",
" plt.show()\n",
"\n",
"def plot_the_loss_curve(epochs, rmse):\n",
" \"\"\"Plot the loss curve, which shows loss vs. epoch.\"\"\"\n",
"\n",
" plt.figure()\n",
" plt.xlabel(\"Epoch\")\n",
" plt.ylabel(\"Root Mean Squared Error\")\n",
"\n",
" plt.plot(epochs, rmse, label=\"Loss\")\n",
" plt.legend()\n",
" plt.ylim([rmse.min()*0.97, rmse.max()])\n",
" plt.show()\n",
"\n",
"print(\"Defined the plot_the_model and plot_the_loss_curve functions.\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "LVSDPusELEZ5"
},
"source": [
"## Load the dataset\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2qrBFb50bVTW"
},
"source": [
"Import the full training data set (as a pandas dataframe). Direct links to the datasets used are provided, but your own data can be uploaded via google drive, github, or local filesystem. "
]
},
{
"cell_type": "code",
"source": [
"# If needed, this cell allows upload of local files for training/testing\n",
"# Uncomment (remove the hashtags) from the following two lines then run cell\n",
"# from google.colab import files\n",
"# uploaded = files.upload()"
],
"metadata": {
"id": "95fhk6O9tVZm"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"id": "ruBbYkc7a6rM"
},
"outputs": [],
"source": [
"# Load training set from RNAStructuromeDB\n",
"training_df = pd.read_csv('https://structurome.bb.iastate.edu/files/download/trainingset.txt.gz')\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"id": "xJB4L0bCI819"
},
"outputs": [],
"source": [
"# Load test set from RNAStructuromeDB\n",
"test_df = pd.read_csv('https://structurome.bb.iastate.edu/files/download/testset.rnacentral.20-200.all_.svm_data.csv__0.gz')\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "klSyq_vhoO5J"
},
"source": [
"Check the data structure using \"dataframe.describe\""
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 330
},
"id": "HEA1x_iX3yHS",
"outputId": "937802e2-3b57-4c01-b7d2-d237934a2bbc"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" length GC% CGratio AUratio MFE AA AU \\\n",
"count 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 \n",
"mean 85.1 0.5 0.5 0.5 -26.7 0.1 0.1 \n",
"std 40.6 0.1 0.1 0.1 17.7 0.0 0.0 \n",
"min 20.0 0.0 0.0 0.0 -221.1 0.0 0.0 \n",
"25% 66.0 0.4 0.4 0.4 -35.4 0.0 0.0 \n",
"50% 74.0 0.5 0.5 0.5 -25.7 0.1 0.1 \n",
"75% 107.0 0.6 0.5 0.5 -14.1 0.1 0.1 \n",
"max 200.0 1.0 1.0 1.0 0.0 1.0 0.5 \n",
"\n",
" AG AC UA ... GG GC CA \\\n",
"count 5729593.0 5729592.0 5729592.0 ... 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"min 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"25% 0.1 0.0 0.0 ... 0.0 0.0 0.0 \n",
"50% 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.0 ... 0.1 0.1 0.1 \n",
"max 0.5 0.5 0.0 ... 1.0 0.4 0.5 \n",
"\n",
" CU CG CC MonoMFE mono_z DiMFE di_z \n",
"count 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.1 -20.8 -0.0 -21.5 -0.0 \n",
"std 0.0 0.0 0.0 13.3 1.0 13.6 1.0 \n",
"min 0.0 0.0 0.0 -118.5 -10.0 -140.0 -10.0 \n",
"25% 0.0 0.0 0.0 -27.1 -0.6 -27.9 -0.6 \n",
"50% 0.1 0.0 0.1 -20.0 0.1 -21.0 0.1 \n",
"75% 0.1 0.1 0.1 -10.9 0.7 -11.5 0.7 \n",
"max 0.5 0.3 1.0 0.0 4.4 0.0 4.0 \n",
"\n",
"[8 rows x 25 columns]"
],
"text/html": [
"\n",
"
\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" length | \n",
" GC% | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" ... | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
"
\n",
" \n",
" mean | \n",
" 85.1 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -26.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.8 | \n",
" -0.0 | \n",
" -21.5 | \n",
" -0.0 | \n",
"
\n",
" \n",
" std | \n",
" 40.6 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 17.7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 13.3 | \n",
" 1.0 | \n",
" 13.6 | \n",
" 1.0 | \n",
"
\n",
" \n",
" min | \n",
" 20.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -221.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -118.5 | \n",
" -10.0 | \n",
" -140.0 | \n",
" -10.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 66.0 | \n",
" 0.4 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -35.4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -27.1 | \n",
" -0.6 | \n",
" -27.9 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 50% | \n",
" 74.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -25.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.0 | \n",
" 0.1 | \n",
" -21.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 107.0 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -14.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -10.9 | \n",
" 0.7 | \n",
" -11.5 | \n",
" 0.7 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.0 | \n",
" ... | \n",
" 1.0 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.3 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 4.4 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 23
}
],
"source": [
"test_df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rQYn0ny3oT1I"
},
"source": [
"Rename columns (to ensure compatibility with later functions)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"id": "0Lo7hJ3D36MR"
},
"outputs": [],
"source": [
"test_df.columns =[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", \"MFE\", \"AA\",\"AU\",\"AG\",\"AC\",\"UA\",\"UU\",\"UG\",\"UC\",\"GA\",\"GU\",\"GG\",\"GC\",\"CA\",\"CU\",\"CG\", \"CC\", \"MonoMFE\", \"mono_z\", \"DiMFE\", \"di_z\"]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NwyfPkkbobdr"
},
"source": [
"Ensure values are numeric"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "Uan8bWjMt5Ss",
"outputId": "280bf5df-181b-45eb-8167-2d14d4ef15d3"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE AA AU AG AC UA ... \\\n",
"0 200 0.6 0.4 0.6 -59.2 0.1 0.0 0.1 0.1 0.0 ... \n",
"1 72 0.2 0.4 0.5 -11.9 0.1 0.2 0.0 0.0 0.0 ... \n",
"2 104 0.5 0.4 0.5 -31.3 0.0 0.0 0.1 0.1 0.0 ... \n",
"3 24 0.4 0.7 0.5 -0.2 0.1 0.1 0.0 0.1 0.0 ... \n",
"4 28 0.5 0.7 0.5 -1.1 0.0 0.0 0.1 0.0 0.0 ... \n",
"... ... ... ... ... ... .. .. .. .. .. ... \n",
"5729588 73 0.6 0.4 0.4 -25.8 0.0 0.0 0.1 0.0 0.0 ... \n",
"5729589 117 0.6 0.4 0.5 -38.6 0.0 0.1 0.1 0.0 0.0 ... \n",
"5729590 85 0.6 0.4 0.5 -37.3 0.0 0.0 0.1 0.0 0.0 ... \n",
"5729591 113 0.4 0.5 0.5 -22.2 0.1 0.1 0.1 0.1 0.0 ... \n",
"5729592 84 0.6 0.5 0.5 -34.4 0.1 0.0 0.0 0.0 0.0 ... \n",
"\n",
" GG GC CA CU CG CC MonoMFE mono_z DiMFE di_z \n",
"0 0.1 0.1 0.0 0.1 0.1 0.0 -64.8 0.4 -64.9 -2.0 \n",
"1 0.1 0.0 0.0 0.0 0.0 0.0 -7.4 0.3 -10.3 0.6 \n",
"2 0.1 0.0 0.1 0.0 0.1 0.1 -30.0 0.1 -29.6 -0.6 \n",
"3 0.0 0.0 0.1 0.1 0.0 0.1 -0.7 0.1 -0.2 0.3 \n",
"4 0.0 0.1 0.1 0.0 0.1 0.1 -1.6 -1.0 -1.0 -0.1 \n",
"... .. .. .. .. .. .. ... ... ... ... \n",
"5729588 0.1 0.1 0.0 0.1 0.1 0.1 -23.4 -0.2 -20.9 1.2 \n",
"5729589 0.1 0.1 0.1 0.0 0.1 0.1 -35.9 -2.2 -35.0 1.0 \n",
"5729590 0.1 0.1 0.1 0.1 0.1 0.1 -29.2 -0.6 -30.1 1.9 \n",
"5729591 0.0 0.0 0.1 0.0 0.0 0.0 -21.4 0.1 -22.9 1.4 \n",
"5729592 0.1 0.1 0.0 0.0 0.1 0.1 -28.0 0.3 -29.8 -0.9 \n",
"\n",
"[5729593 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 200 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.6 | \n",
" -59.2 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" -64.8 | \n",
" 0.4 | \n",
" -64.9 | \n",
" -2.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 72 | \n",
" 0.2 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -11.9 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -7.4 | \n",
" 0.3 | \n",
" -10.3 | \n",
" 0.6 | \n",
"
\n",
" \n",
" 2 | \n",
" 104 | \n",
" 0.5 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -31.3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -30.0 | \n",
" 0.1 | \n",
" -29.6 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 3 | \n",
" 24 | \n",
" 0.4 | \n",
" 0.7 | \n",
" 0.5 | \n",
" -0.2 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -0.7 | \n",
" 0.1 | \n",
" -0.2 | \n",
" 0.3 | \n",
"
\n",
" \n",
" 4 | \n",
" 28 | \n",
" 0.5 | \n",
" 0.7 | \n",
" 0.5 | \n",
" -1.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -1.6 | \n",
" -1.0 | \n",
" -1.0 | \n",
" -0.1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 5729588 | \n",
" 73 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -25.8 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -23.4 | \n",
" -0.2 | \n",
" -20.9 | \n",
" 1.2 | \n",
"
\n",
" \n",
" 5729589 | \n",
" 117 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -38.6 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -35.9 | \n",
" -2.2 | \n",
" -35.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 5729590 | \n",
" 85 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -37.3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -29.2 | \n",
" -0.6 | \n",
" -30.1 | \n",
" 1.9 | \n",
"
\n",
" \n",
" 5729591 | \n",
" 113 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -22.2 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -21.4 | \n",
" 0.1 | \n",
" -22.9 | \n",
" 1.4 | \n",
"
\n",
" \n",
" 5729592 | \n",
" 84 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -34.4 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -28.0 | \n",
" 0.3 | \n",
" -29.8 | \n",
" -0.9 | \n",
"
\n",
" \n",
"
\n",
"
5729593 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 25
}
],
"source": [
"test_df.apply(pd.to_numeric)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BIjDxRR-oe0c"
},
"source": [
"Check structure again. Ensure all counts are the same!"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 330
},
"id": "VxB8bEG3uSLU",
"outputId": "c8cdd837-8106-4444-95f2-23d1290b41e8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE AA AU \\\n",
"count 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 \n",
"mean 85.1 0.5 0.5 0.5 -26.7 0.1 0.1 \n",
"std 40.6 0.1 0.1 0.1 17.7 0.0 0.0 \n",
"min 20.0 0.0 0.0 0.0 -221.1 0.0 0.0 \n",
"25% 66.0 0.4 0.4 0.4 -35.4 0.0 0.0 \n",
"50% 74.0 0.5 0.5 0.5 -25.7 0.1 0.1 \n",
"75% 107.0 0.6 0.5 0.5 -14.1 0.1 0.1 \n",
"max 200.0 1.0 1.0 1.0 0.0 1.0 0.5 \n",
"\n",
" AG AC UA ... GG GC CA \\\n",
"count 5729593.0 5729592.0 5729592.0 ... 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"min 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"25% 0.1 0.0 0.0 ... 0.0 0.0 0.0 \n",
"50% 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.0 ... 0.1 0.1 0.1 \n",
"max 0.5 0.5 0.0 ... 1.0 0.4 0.5 \n",
"\n",
" CU CG CC MonoMFE mono_z DiMFE di_z \n",
"count 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.1 -20.8 -0.0 -21.5 -0.0 \n",
"std 0.0 0.0 0.0 13.3 1.0 13.6 1.0 \n",
"min 0.0 0.0 0.0 -118.5 -10.0 -140.0 -10.0 \n",
"25% 0.0 0.0 0.0 -27.1 -0.6 -27.9 -0.6 \n",
"50% 0.1 0.0 0.1 -20.0 0.1 -21.0 0.1 \n",
"75% 0.1 0.1 0.1 -10.9 0.7 -11.5 0.7 \n",
"max 0.5 0.3 1.0 0.0 4.4 0.0 4.0 \n",
"\n",
"[8 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" ... | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
"
\n",
" \n",
" mean | \n",
" 85.1 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -26.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.8 | \n",
" -0.0 | \n",
" -21.5 | \n",
" -0.0 | \n",
"
\n",
" \n",
" std | \n",
" 40.6 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 17.7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 13.3 | \n",
" 1.0 | \n",
" 13.6 | \n",
" 1.0 | \n",
"
\n",
" \n",
" min | \n",
" 20.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -221.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -118.5 | \n",
" -10.0 | \n",
" -140.0 | \n",
" -10.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 66.0 | \n",
" 0.4 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -35.4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -27.1 | \n",
" -0.6 | \n",
" -27.9 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 50% | \n",
" 74.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -25.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.0 | \n",
" 0.1 | \n",
" -21.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 107.0 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -14.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -10.9 | \n",
" 0.7 | \n",
" -11.5 | \n",
" 0.7 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.0 | \n",
" ... | \n",
" 1.0 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.3 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 4.4 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 26
}
],
"source": [
"test_df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2L0crcGabr8B"
},
"source": [
"### Training data\n",
"Set the column names for training data (if not already set)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"id": "AFqCzv6HbSt5"
},
"outputs": [],
"source": [
"training_df.columns = [\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", \"MFE\", \"MonoMFE\", \"MonoStd\", \"DiMFE\", \"DiStd\", \"AA\",\"AU\",\"AG\",\"AC\",\"UA\",\"UU\",\"UG\",\"UC\",\"GA\",\"GU\",\"GG\",\"GC\",\"CA\",\"CU\",\"CG\", \"CC\"]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"id": "LXZL5h3s5KGH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 330
},
"outputId": "bf109352-b8b3-416f-e613-12348f83c6f4"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE MonoMFE MonoStd \\\n",
"count 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 \n",
"mean 152.4 0.5 0.5 0.5 -34.4 -34.5 3.7 \n",
"std 37.1 0.1 0.2 0.1 18.9 18.4 0.6 \n",
"min 60.0 0.1 0.1 0.1 -140.7 -125.9 0.4 \n",
"25% 120.0 0.5 0.4 0.4 -45.4 -45.0 3.3 \n",
"50% 160.0 0.5 0.5 0.5 -32.0 -31.9 3.8 \n",
"75% 180.0 0.6 0.6 0.6 -20.6 -21.0 4.2 \n",
"max 200.0 0.8 0.9 0.9 0.0 -0.1 6.4 \n",
"\n",
" DiMFE DiStd AA ... UG UC GA GU \\\n",
"count 836376.0 836376.0 836376.0 ... 836376.0 836376.0 836376.0 836376.0 \n",
"mean -34.4 3.4 0.1 ... 0.1 0.1 0.1 0.1 \n",
"std 18.6 0.6 0.1 ... 0.0 0.0 0.0 0.0 \n",
"min -128.7 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n",
"25% -45.1 3.0 0.0 ... 0.0 0.0 0.0 0.0 \n",
"50% -31.9 3.4 0.1 ... 0.1 0.1 0.1 0.1 \n",
"75% -20.8 3.9 0.1 ... 0.1 0.1 0.1 0.1 \n",
"max -0.0 6.0 0.5 ... 0.2 0.2 0.2 0.2 \n",
"\n",
" GG GC CA CU CG CC \n",
"count 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 \n",
"mean 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"std 0.1 0.0 0.0 0.0 0.0 0.1 \n",
"min 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"25% 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"50% 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"max 0.4 0.2 0.2 0.2 0.2 0.4 \n",
"\n",
"[8 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" MonoMFE | \n",
" MonoStd | \n",
" DiMFE | \n",
" DiStd | \n",
" AA | \n",
" ... | \n",
" UG | \n",
" UC | \n",
" GA | \n",
" GU | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" ... | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
"
\n",
" \n",
" mean | \n",
" 152.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -34.4 | \n",
" -34.5 | \n",
" 3.7 | \n",
" -34.4 | \n",
" 3.4 | \n",
" 0.1 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" std | \n",
" 37.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.1 | \n",
" 18.9 | \n",
" 18.4 | \n",
" 0.6 | \n",
" 18.6 | \n",
" 0.6 | \n",
" 0.1 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
" min | \n",
" 60.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -140.7 | \n",
" -125.9 | \n",
" 0.4 | \n",
" -128.7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 120.0 | \n",
" 0.5 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -45.4 | \n",
" -45.0 | \n",
" 3.3 | \n",
" -45.1 | \n",
" 3.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 50% | \n",
" 160.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -32.0 | \n",
" -31.9 | \n",
" 3.8 | \n",
" -31.9 | \n",
" 3.4 | \n",
" 0.1 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 180.0 | \n",
" 0.6 | \n",
" 0.6 | \n",
" 0.6 | \n",
" -20.6 | \n",
" -21.0 | \n",
" 4.2 | \n",
" -20.8 | \n",
" 3.9 | \n",
" 0.1 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 0.8 | \n",
" 0.9 | \n",
" 0.9 | \n",
" 0.0 | \n",
" -0.1 | \n",
" 6.4 | \n",
" -0.0 | \n",
" 6.0 | \n",
" 0.5 | \n",
" ... | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.4 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.4 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 31
}
],
"source": [
"training_df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xK6aXDgybz2h"
},
"source": [
"Round values if needed (to save on memory)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"id": "2uFReSLjs-Qh"
},
"outputs": [],
"source": [
"training_df = training_df.round(2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pzC07m0Io3OQ"
},
"source": [
"Now we can remove the MFE column while simultaneoulsy setting it as our feature (using the \".pop\" function). Our LABEL is what we are trying to predict."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"id": "sM0jCxzUVDpO"
},
"outputs": [],
"source": [
"# Train for Mononucleotide shuffling:\n",
"my_label = training_df.pop('MonoMFE')\n",
"\n",
"# OR #\n",
"\n",
"# Train for Dinucleotide shuffling: \n",
"#my_label = training_df.pop('DiStd')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JLSY5on4pEGC"
},
"source": [
"Our feature is what we use to PREDICT our label. Here we drop the values we don't want to use. "
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"id": "mjBM_1HhcDBM"
},
"outputs": [],
"source": [
"### Full features for MFE (Comment out and select lines as needed:)\n",
"# Mononucleotide features:\n",
"# Mono MFE:\n",
"my_feature = training_df.drop(columns=[\"DiMFE\", \"DiStd\", 'MonoStd', 'MFE'])\n",
"# Mono Std:\n",
"#my_feature = training_df.drop(columns=[\"DiMFE\", \"DiStd\", 'MonoMFE', 'MFE'])\n",
"\n",
"# Dinucleotide features:\n",
"# Di MFE\n",
"# my_feature = training_df.drop(columns=[\"MonoMFE\", \"DiStd\", 'MonoStd', 'MFE'])\n",
"# Di Std:\n",
"#my_feature = training_df.drop(columns=[\"DiMFE\", \"MonoMFE\", 'MonoStd', 'MFE'])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9P41e9jWpXhE"
},
"source": [
"Using the descibe function, we can now see how many features will be used to predict our label."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"id": "S8wnsylucIEh",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"outputId": "8f2ea1af-6a5c-4cd1-aa6b-210966648dc1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio AA AU AG \\\n",
"count 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 \n",
"mean 152.4 0.5 0.5 0.5 0.1 0.1 0.1 \n",
"std 37.1 0.1 0.2 0.1 0.1 0.0 0.0 \n",
"min 60.0 0.1 0.1 0.1 0.0 0.0 0.0 \n",
"25% 120.0 0.5 0.4 0.4 0.0 0.0 0.0 \n",
"50% 160.0 0.5 0.5 0.5 0.1 0.1 0.1 \n",
"75% 180.0 0.6 0.6 0.6 0.1 0.1 0.1 \n",
"max 200.0 0.8 0.9 0.9 0.5 0.2 0.2 \n",
"\n",
" AC UA UU UG UC GA GU GG \\\n",
"count 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 836376.0 \n",
"mean 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.1 0.0 0.0 0.0 0.0 0.1 \n",
"min 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"25% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"50% 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 \n",
"max 0.2 0.2 0.4 0.2 0.2 0.2 0.2 0.4 \n",
"\n",
" GC CA CU CG CC \n",
"count 836376.0 836376.0 836376.0 836376.0 836376.0 \n",
"mean 0.1 0.1 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.0 0.0 0.1 \n",
"min 0.0 0.0 0.0 0.0 0.0 \n",
"25% 0.0 0.0 0.0 0.0 0.0 \n",
"50% 0.1 0.1 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.1 0.1 0.1 \n",
"max 0.2 0.2 0.2 0.2 0.4 "
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" UU | \n",
" UG | \n",
" UC | \n",
" GA | \n",
" GU | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
" 836376.0 | \n",
"
\n",
" \n",
" mean | \n",
" 152.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" std | \n",
" 37.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
" min | \n",
" 60.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 120.0 | \n",
" 0.5 | \n",
" 0.4 | \n",
" 0.4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 50% | \n",
" 160.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 180.0 | \n",
" 0.6 | \n",
" 0.6 | \n",
" 0.6 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 0.8 | \n",
" 0.9 | \n",
" 0.9 | \n",
" 0.5 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.4 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.4 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.4 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 35
}
],
"source": [
"my_feature.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "K00WeBUZpfan"
},
"source": [
"Just another describe to ensure our label is the one we want before model building. Check that counts are the same between label and features. Ensure values look reasonable."
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"id": "rFlDsxaOcK3F",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e2cf34f1-d720-47e3-b0ce-844f4a14d2a1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"count 836376.0\n",
"mean -34.5\n",
"std 18.4\n",
"min -125.9\n",
"25% -45.0\n",
"50% -31.9\n",
"75% -21.0\n",
"max -0.1\n",
"Name: MonoMFE, dtype: float64"
]
},
"metadata": {},
"execution_count": 36
}
],
"source": [
"my_label.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8RbxM0tNo0wV"
},
"source": [
"Make sure all fields are set to be numeric"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"id": "PZKzpaI-oXOU",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"outputId": "c9a9dc97-0235-4da7-97c7-b8a9d7e9e647"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio AA AU AG AC UA UU UG UC \\\n",
"0 100 0.8 0.7 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 \n",
"1 100 0.8 0.7 0.2 0.0 0.0 0.0 0.0 0.0 0.1 0.1 0.1 \n",
"2 100 0.8 0.7 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 \n",
"3 100 0.8 0.7 0.2 0.0 0.0 0.0 0.0 0.0 0.1 0.0 0.1 \n",
"4 100 0.8 0.7 0.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 \n",
"... ... ... ... ... .. .. .. .. .. .. .. .. \n",
"836371 80 0.8 0.4 0.5 0.0 0.0 0.1 0.0 0.0 0.0 0.1 0.0 \n",
"836372 80 0.8 0.4 0.5 0.0 0.0 0.1 0.0 0.0 0.0 0.1 0.0 \n",
"836373 80 0.8 0.4 0.5 0.0 0.0 0.1 0.0 0.0 0.0 0.1 0.0 \n",
"836374 80 0.8 0.4 0.5 0.0 0.0 0.1 0.0 0.0 0.0 0.1 0.0 \n",
"836375 80 0.8 0.4 0.5 0.0 0.0 0.1 0.0 0.0 0.0 0.1 0.0 \n",
"\n",
" GA GU GG GC CA CU CG CC \n",
"0 0.0 0.0 0.0 0.1 0.0 0.1 0.1 0.2 \n",
"1 0.0 0.1 0.0 0.1 0.0 0.1 0.1 0.3 \n",
"2 0.0 0.0 0.0 0.1 0.0 0.1 0.1 0.2 \n",
"3 0.0 0.1 0.0 0.1 0.0 0.1 0.1 0.3 \n",
"4 0.0 0.0 0.0 0.1 0.0 0.1 0.1 0.3 \n",
"... .. .. .. .. .. .. .. .. \n",
"836371 0.0 0.1 0.2 0.2 0.0 0.0 0.2 0.1 \n",
"836372 0.1 0.1 0.2 0.1 0.0 0.0 0.1 0.1 \n",
"836373 0.1 0.0 0.2 0.2 0.0 0.0 0.1 0.1 \n",
"836374 0.1 0.1 0.2 0.2 0.0 0.0 0.2 0.1 \n",
"836375 0.1 0.1 0.2 0.1 0.0 0.0 0.1 0.1 \n",
"\n",
"[836376 rows x 20 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" UU | \n",
" UG | \n",
" UC | \n",
" GA | \n",
" GU | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 100 | \n",
" 0.8 | \n",
" 0.7 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
"
\n",
" \n",
" 1 | \n",
" 100 | \n",
" 0.8 | \n",
" 0.7 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.3 | \n",
"
\n",
" \n",
" 2 | \n",
" 100 | \n",
" 0.8 | \n",
" 0.7 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
"
\n",
" \n",
" 3 | \n",
" 100 | \n",
" 0.8 | \n",
" 0.7 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.3 | \n",
"
\n",
" \n",
" 4 | \n",
" 100 | \n",
" 0.8 | \n",
" 0.7 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.3 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 836371 | \n",
" 80 | \n",
" 0.8 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.2 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 836372 | \n",
" 80 | \n",
" 0.8 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 836373 | \n",
" 80 | \n",
" 0.8 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 836374 | \n",
" 80 | \n",
" 0.8 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.2 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 836375 | \n",
" 80 | \n",
" 0.8 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
"
\n",
"
836376 rows × 20 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 37
}
],
"source": [
"my_feature.apply(pd.to_numeric)\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"id": "xcyw0QljtPSO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5cbfac4c-855f-4039-e258-72f46343b809"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 -19.5\n",
"1 -19.3\n",
"2 -19.8\n",
"3 -18.9\n",
"4 -19.0\n",
" ... \n",
"836371 -40.2\n",
"836372 -40.3\n",
"836373 -40.0\n",
"836374 -40.6\n",
"836375 -40.2\n",
"Name: MonoMFE, Length: 836376, dtype: float64"
]
},
"metadata": {},
"execution_count": 38
}
],
"source": [
"my_label.apply(pd.to_numeric)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "K24afla-4s2x"
},
"source": [
"## Training the model\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NRd8ncTmq435"
},
"source": [
"Here you can modify the parameters (learning rate, epochs, and batch size). You must also ensure that your \"shape\" is correct. The format for defining the shape is (X,) where x is number of columns in feature dataframe followed by just a comma."
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"id": "Ye730h13CQ97",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "9a90d314-7bd0-49aa-c2cf-970b25c35825"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/150\n",
"697/697 [==============================] - 4s 5ms/step - loss: 639.3092 - root_mean_squared_error: 25.2846\n",
"Epoch 2/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 196.3606 - root_mean_squared_error: 14.0129\n",
"Epoch 3/150\n",
"697/697 [==============================] - 5s 7ms/step - loss: 119.9975 - root_mean_squared_error: 10.9543\n",
"Epoch 4/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 100.1245 - root_mean_squared_error: 10.0062\n",
"Epoch 5/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 87.9115 - root_mean_squared_error: 9.3761\n",
"Epoch 6/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 78.1567 - root_mean_squared_error: 8.8406\n",
"Epoch 7/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 69.5773 - root_mean_squared_error: 8.3413\n",
"Epoch 8/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 62.4277 - root_mean_squared_error: 7.9011\n",
"Epoch 9/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 55.9522 - root_mean_squared_error: 7.4801\n",
"Epoch 10/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 50.1049 - root_mean_squared_error: 7.0785\n",
"Epoch 11/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 44.6236 - root_mean_squared_error: 6.6801\n",
"Epoch 12/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 39.0956 - root_mean_squared_error: 6.2526\n",
"Epoch 13/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 33.9663 - root_mean_squared_error: 5.8281\n",
"Epoch 14/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 29.6879 - root_mean_squared_error: 5.4487\n",
"Epoch 15/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 26.2835 - root_mean_squared_error: 5.1267\n",
"Epoch 16/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 23.4873 - root_mean_squared_error: 4.8464\n",
"Epoch 17/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 21.0786 - root_mean_squared_error: 4.5911\n",
"Epoch 18/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 18.9600 - root_mean_squared_error: 4.3543\n",
"Epoch 19/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 17.3737 - root_mean_squared_error: 4.1682\n",
"Epoch 20/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 16.0618 - root_mean_squared_error: 4.0077\n",
"Epoch 21/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 14.9524 - root_mean_squared_error: 3.8668\n",
"Epoch 22/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 14.0327 - root_mean_squared_error: 3.7460\n",
"Epoch 23/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 13.2590 - root_mean_squared_error: 3.6413\n",
"Epoch 24/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 12.4937 - root_mean_squared_error: 3.5346\n",
"Epoch 25/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 11.8049 - root_mean_squared_error: 3.4358\n",
"Epoch 26/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 11.1505 - root_mean_squared_error: 3.3392\n",
"Epoch 27/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 10.5037 - root_mean_squared_error: 3.2409\n",
"Epoch 28/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 9.9116 - root_mean_squared_error: 3.1483\n",
"Epoch 29/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 9.3931 - root_mean_squared_error: 3.0648\n",
"Epoch 30/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 8.8643 - root_mean_squared_error: 2.9773\n",
"Epoch 31/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 8.4094 - root_mean_squared_error: 2.8999\n",
"Epoch 32/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 7.9888 - root_mean_squared_error: 2.8264\n",
"Epoch 33/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 7.6322 - root_mean_squared_error: 2.7626\n",
"Epoch 34/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 7.2829 - root_mean_squared_error: 2.6987\n",
"Epoch 35/150\n",
"697/697 [==============================] - 4s 5ms/step - loss: 6.9896 - root_mean_squared_error: 2.6438\n",
"Epoch 36/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 6.7021 - root_mean_squared_error: 2.5888\n",
"Epoch 37/150\n",
"697/697 [==============================] - 4s 5ms/step - loss: 6.4707 - root_mean_squared_error: 2.5438\n",
"Epoch 38/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 6.2299 - root_mean_squared_error: 2.4960\n",
"Epoch 39/150\n",
"697/697 [==============================] - 5s 7ms/step - loss: 6.0281 - root_mean_squared_error: 2.4552\n",
"Epoch 40/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 5.8690 - root_mean_squared_error: 2.4226\n",
"Epoch 41/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 5.7094 - root_mean_squared_error: 2.3894\n",
"Epoch 42/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 5.5660 - root_mean_squared_error: 2.3592\n",
"Epoch 43/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 5.4271 - root_mean_squared_error: 2.3296\n",
"Epoch 44/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 5.2882 - root_mean_squared_error: 2.2996\n",
"Epoch 45/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 5.1793 - root_mean_squared_error: 2.2758\n",
"Epoch 46/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 5.0446 - root_mean_squared_error: 2.2460\n",
"Epoch 47/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 4.9433 - root_mean_squared_error: 2.2234\n",
"Epoch 48/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 4.8393 - root_mean_squared_error: 2.1998\n",
"Epoch 49/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 4.7356 - root_mean_squared_error: 2.1761\n",
"Epoch 50/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 4.6524 - root_mean_squared_error: 2.1569\n",
"Epoch 51/150\n",
"697/697 [==============================] - 4s 5ms/step - loss: 4.5616 - root_mean_squared_error: 2.1358\n",
"Epoch 52/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 4.4696 - root_mean_squared_error: 2.1142\n",
"Epoch 53/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 4.3922 - root_mean_squared_error: 2.0958\n",
"Epoch 54/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 4.2855 - root_mean_squared_error: 2.0702\n",
"Epoch 55/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 4.1919 - root_mean_squared_error: 2.0474\n",
"Epoch 56/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 4.1037 - root_mean_squared_error: 2.0258\n",
"Epoch 57/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 4.0537 - root_mean_squared_error: 2.0134\n",
"Epoch 58/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.9709 - root_mean_squared_error: 1.9927\n",
"Epoch 59/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.9214 - root_mean_squared_error: 1.9803\n",
"Epoch 60/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.8481 - root_mean_squared_error: 1.9616\n",
"Epoch 61/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.7683 - root_mean_squared_error: 1.9412\n",
"Epoch 62/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.7508 - root_mean_squared_error: 1.9367\n",
"Epoch 63/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.6959 - root_mean_squared_error: 1.9225\n",
"Epoch 64/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.6279 - root_mean_squared_error: 1.9047\n",
"Epoch 65/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.6046 - root_mean_squared_error: 1.8986\n",
"Epoch 66/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.5579 - root_mean_squared_error: 1.8862\n",
"Epoch 67/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.5166 - root_mean_squared_error: 1.8753\n",
"Epoch 68/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.4637 - root_mean_squared_error: 1.8611\n",
"Epoch 69/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.4250 - root_mean_squared_error: 1.8507\n",
"Epoch 70/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.3870 - root_mean_squared_error: 1.8404\n",
"Epoch 71/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.3457 - root_mean_squared_error: 1.8291\n",
"Epoch 72/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.3152 - root_mean_squared_error: 1.8208\n",
"Epoch 73/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.2870 - root_mean_squared_error: 1.8130\n",
"Epoch 74/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.2640 - root_mean_squared_error: 1.8067\n",
"Epoch 75/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.2135 - root_mean_squared_error: 1.7926\n",
"Epoch 76/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.1894 - root_mean_squared_error: 1.7859\n",
"Epoch 77/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.1377 - root_mean_squared_error: 1.7714\n",
"Epoch 78/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 3.1348 - root_mean_squared_error: 1.7706\n",
"Epoch 79/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.0821 - root_mean_squared_error: 1.7556\n",
"Epoch 80/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 3.0684 - root_mean_squared_error: 1.7517\n",
"Epoch 81/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.0142 - root_mean_squared_error: 1.7361\n",
"Epoch 82/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 3.0253 - root_mean_squared_error: 1.7393\n",
"Epoch 83/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.9675 - root_mean_squared_error: 1.7226\n",
"Epoch 84/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.9496 - root_mean_squared_error: 1.7174\n",
"Epoch 85/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.9379 - root_mean_squared_error: 1.7140\n",
"Epoch 86/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.9133 - root_mean_squared_error: 1.7068\n",
"Epoch 87/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.8791 - root_mean_squared_error: 1.6968\n",
"Epoch 88/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.8440 - root_mean_squared_error: 1.6864\n",
"Epoch 89/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.8425 - root_mean_squared_error: 1.6860\n",
"Epoch 90/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.8090 - root_mean_squared_error: 1.6760\n",
"Epoch 91/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.7931 - root_mean_squared_error: 1.6712\n",
"Epoch 92/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.7692 - root_mean_squared_error: 1.6641\n",
"Epoch 93/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.7449 - root_mean_squared_error: 1.6568\n",
"Epoch 94/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.7310 - root_mean_squared_error: 1.6526\n",
"Epoch 95/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.7023 - root_mean_squared_error: 1.6439\n",
"Epoch 96/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.6988 - root_mean_squared_error: 1.6428\n",
"Epoch 97/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.6747 - root_mean_squared_error: 1.6354\n",
"Epoch 98/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.6442 - root_mean_squared_error: 1.6261\n",
"Epoch 99/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.6269 - root_mean_squared_error: 1.6208\n",
"Epoch 100/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.6086 - root_mean_squared_error: 1.6151\n",
"Epoch 101/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 2.6048 - root_mean_squared_error: 1.6139\n",
"Epoch 102/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.5726 - root_mean_squared_error: 1.6039\n",
"Epoch 103/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.5583 - root_mean_squared_error: 1.5995\n",
"Epoch 104/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.5532 - root_mean_squared_error: 1.5979\n",
"Epoch 105/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.5289 - root_mean_squared_error: 1.5902\n",
"Epoch 106/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.5091 - root_mean_squared_error: 1.5840\n",
"Epoch 107/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4945 - root_mean_squared_error: 1.5794\n",
"Epoch 108/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4690 - root_mean_squared_error: 1.5713\n",
"Epoch 109/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4674 - root_mean_squared_error: 1.5708\n",
"Epoch 110/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4404 - root_mean_squared_error: 1.5622\n",
"Epoch 111/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4199 - root_mean_squared_error: 1.5556\n",
"Epoch 112/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4150 - root_mean_squared_error: 1.5540\n",
"Epoch 113/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.4044 - root_mean_squared_error: 1.5506\n",
"Epoch 114/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3866 - root_mean_squared_error: 1.5449\n",
"Epoch 115/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3740 - root_mean_squared_error: 1.5408\n",
"Epoch 116/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3702 - root_mean_squared_error: 1.5396\n",
"Epoch 117/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3487 - root_mean_squared_error: 1.5325\n",
"Epoch 118/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.3343 - root_mean_squared_error: 1.5278\n",
"Epoch 119/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3184 - root_mean_squared_error: 1.5226\n",
"Epoch 120/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3074 - root_mean_squared_error: 1.5190\n",
"Epoch 121/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.3060 - root_mean_squared_error: 1.5186\n",
"Epoch 122/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2809 - root_mean_squared_error: 1.5103\n",
"Epoch 123/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2650 - root_mean_squared_error: 1.5050\n",
"Epoch 124/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2582 - root_mean_squared_error: 1.5027\n",
"Epoch 125/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2474 - root_mean_squared_error: 1.4991\n",
"Epoch 126/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2309 - root_mean_squared_error: 1.4936\n",
"Epoch 127/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2103 - root_mean_squared_error: 1.4867\n",
"Epoch 128/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.2059 - root_mean_squared_error: 1.4852\n",
"Epoch 129/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1836 - root_mean_squared_error: 1.4777\n",
"Epoch 130/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1748 - root_mean_squared_error: 1.4747\n",
"Epoch 131/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1834 - root_mean_squared_error: 1.4776\n",
"Epoch 132/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1560 - root_mean_squared_error: 1.4683\n",
"Epoch 133/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1513 - root_mean_squared_error: 1.4667\n",
"Epoch 134/150\n",
"697/697 [==============================] - 4s 6ms/step - loss: 2.1361 - root_mean_squared_error: 1.4615\n",
"Epoch 135/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.1392 - root_mean_squared_error: 1.4626\n",
"Epoch 136/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.1258 - root_mean_squared_error: 1.4580\n",
"Epoch 137/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.1026 - root_mean_squared_error: 1.4501\n",
"Epoch 138/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0969 - root_mean_squared_error: 1.4481\n",
"Epoch 139/150\n",
"697/697 [==============================] - 6s 9ms/step - loss: 2.0778 - root_mean_squared_error: 1.4415\n",
"Epoch 140/150\n",
"697/697 [==============================] - 5s 7ms/step - loss: 2.0872 - root_mean_squared_error: 1.4447\n",
"Epoch 141/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0562 - root_mean_squared_error: 1.4340\n",
"Epoch 142/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0496 - root_mean_squared_error: 1.4316\n",
"Epoch 143/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0537 - root_mean_squared_error: 1.4331\n",
"Epoch 144/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.0342 - root_mean_squared_error: 1.4263\n",
"Epoch 145/150\n",
"697/697 [==============================] - 3s 4ms/step - loss: 2.0359 - root_mean_squared_error: 1.4269\n",
"Epoch 146/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0162 - root_mean_squared_error: 1.4199\n",
"Epoch 147/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 2.0137 - root_mean_squared_error: 1.4191\n",
"Epoch 148/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 1.9911 - root_mean_squared_error: 1.4111\n",
"Epoch 149/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 1.9885 - root_mean_squared_error: 1.4102\n",
"Epoch 150/150\n",
"697/697 [==============================] - 3s 5ms/step - loss: 1.9712 - root_mean_squared_error: 1.4040\n"
]
}
],
"source": [
"learning_rate=0.001\n",
"epochs=150\n",
"my_batch_size=1200\n",
"units = 1\n",
"shape = (20,)\n",
"\n",
"my_model = build_model(learning_rate, units, shape)\n",
"trained_weight, trained_bias, epochs, rmse = train_model(my_model, my_feature, \n",
" my_label, epochs,\n",
" my_batch_size)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YViVZelvlp1Z"
},
"source": [
"Now we can save the model. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I_hFLYbDlpZx",
"outputId": "470a1218-6b73-4dab-e1c6-325ea261d728",
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: /content/drive/My Drive/DiStd/assets\n"
]
}
],
"source": [
"folder_name = \"set/path/of/model/folder\"\n",
"tf.keras.models.save_model(my_model, folder_name)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_JAG5JYytnTr"
},
"source": [
"## The following code can be used to troubleshoot the model building steps IF the training function is failing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HH4eRnesmGg4"
},
"outputs": [],
"source": [
"my_model.add(tf.keras.layers.Dense(units=20, input_shape=(1, )))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 324
},
"collapsed": true,
"id": "AWvJ0HaDloDX",
"outputId": "b161c90a-315a-47f7-fda9-53593889f546"
},
"outputs": [
{
"ename": "TypeError",
"evalue": "ignored",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mhistory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmy_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmy_feature\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmy_label\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmy_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1146\u001b[0m \u001b[0muse_multiprocessing\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0muse_multiprocessing\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1147\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1148\u001b[0;31m steps_per_execution=self._steps_per_execution)\n\u001b[0m\u001b[1;32m 1149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1150\u001b[0m \u001b[0;31m# Container that configures and calls `tf.keras.Callback`s.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36mget_data_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"model\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"_cluster_coordinator\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1382\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_ClusterCoordinatorDataHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1383\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDataHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1384\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution, distribute)\u001b[0m\n\u001b[1;32m 1141\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1142\u001b[0m \u001b[0msteps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1143\u001b[0;31m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1144\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1145\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'list' and 'int'"
]
}
],
"source": [
"history = my_model.fit(my_feature, my_label, my_batch_size, epochs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YJzOduxVl2GJ"
},
"outputs": [],
"source": [
"epochs = history.epoch\n",
"hist = pd.DataFrame(history.history)\n",
"rmse = hist[\"root_mean_squared_error\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "scrzhO48pcRj"
},
"outputs": [],
"source": [
"trained_weight = my_model.get_weights()[0]\n",
"trained_bias = my_model.get_weights()[1]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Mxwc_RAupwP_"
},
"outputs": [],
"source": [
"print(trained_weight)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vStvuHK_t1hs"
},
"source": [
"## If you can get the weights and bias, you can now plot the results!"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M694Qnq1rmaG"
},
"source": [
"The loss curve should approach zero (but will likely never reach it). Poor MFE models tend to plateau down to > 5. Try to adjust model and parameters until values reach closer to 2. For standard deviation models the loss and RMSE should be much lower (0.7)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 281
},
"id": "eH1ur8epqzru",
"outputId": "0008ffc3-1f0d-4fb7-d870-01c204aa633e"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"plot_the_loss_curve(epochs, rmse)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jYSpvaglNsDp"
},
"source": [
"[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", \"MFE\", \"AA\",\"AU\",\"AG\",\"AC\",\"UA\",\"UU\",\"UG\",\"UC\",\"GA\",\"GU\",\"GG\",\"GC\",\"CA\",\"CU\",\"CG\", \"CC\"]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "okgNzjrgB4-R"
},
"source": [
"Plot Results for Mean MFE prediction. \n",
"#### *These cells will need to be changed depending on the model you are building.*\n",
"Are you building a dinucleotide or mononucleotide model? Change names accordingly"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"id": "pUYcXkVqCOr3"
},
"outputs": [],
"source": [
"meanMFE_result = my_model.predict(training_df[[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", 'AA', 'AU', 'AG',\n",
" 'AC', 'UA', 'UU', 'UG', 'UC', 'GA', 'GU', 'GG', 'GC', 'CA', 'CU', 'CG', 'CC']])"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"id": "BMBf-xVmEejC",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"outputId": "4177914c-ec6c-49f0-f439-a7593a88a7a1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 60
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"#meanMFE_result = my_model.predict(training_df[[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", 'MFE']])\n",
"#meanMFE_result = my_model.predict(training_df[[\"Length\", \"GCpercent\", \"MFE\"]])\n",
"predicted_mfes = pd.DataFrame(meanMFE_result)\n",
"predicted_mfes.describe()\n",
"\n",
"predicted_mfes.columns = [\"predictedMonoMFEs\"]\n",
"predicted_mfes = predicted_mfes.rename(columns={0:\"PredictedMonoMFE\"})\n",
"predicted_mfes[\"MonoMFE\"] = my_label\n",
"#predicted_mfes[\"DiMFE\"] = my_label\n",
"#predicted_mfes[\"predictedMonoMFEs\"] = predicted_mfes\n",
"# predicted_mfes[\"DiMFE\"] = predicted_mfes\n",
"predicted_mfes[\"MFE\"] = training_df['MFE'].to_list()\n",
"# predicted_mfes[\"RealDiMFE\"] = loaded_df['DiMFE'].to_list()\n",
"predicted_mfes.plot.scatter(x=\"MFE\", y=\"predictedMonoMFEs\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fezSTvZMB-PC"
},
"source": [
"-OR- Plot results for standard deviation prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BY8pj_ICEobh"
},
"outputs": [],
"source": [
"\n",
"DiStd_result = my_model.predict(training_df[[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", 'AA', 'AU', 'AG',\n",
" 'AC', 'UA', 'UU', 'UG', 'UC', 'GA', 'GU', 'GG', 'GC', 'CA', 'CU', 'CG', 'CC']])\n",
"predicted_std = pd.DataFrame(DiStd_result)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 364
},
"collapsed": true,
"id": "D5mYNK9p5aRR",
"outputId": "011e56c1-0bc6-4996-e1c5-8dbb1ff58c8a"
},
"outputs": [
{
"ename": "ValueError",
"evalue": "ignored",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredicted_std\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"predictedDiStd\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mpredicted_std\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"DiStd\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloaded_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'DiStd'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# predicted_std[\"RealDiMFE\"] = loaded_df['DiMFE'].to_list()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpredicted_std\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"DiStd\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"predictedDiStd\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 5498\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5499\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5500\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5501\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5502\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 764\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 766\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 767\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 768\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0;31m# Caller is responsible for ensuring we have an Index object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 217\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/internals/base.py\u001b[0m in \u001b[0;36m_validate_set_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mnew_len\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mold_len\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m raise ValueError(\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0;34mf\"Length mismatch: Expected axis has {old_len} elements, new \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0;34mf\"values have {new_len} elements\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m )\n",
"\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 2 elements, new values have 1 elements"
]
}
],
"source": [
"predicted_std.columns = [\"predictedDiStd\"]\n",
"predicted_std[\"DiStd\"] = loaded_df['DiStd'].to_list()\n",
"# predicted_std[\"RealDiMFE\"] = loaded_df['DiMFE'].to_list()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"id": "gJRCMvD83qcv",
"outputId": "13fe4372-2219-446f-c792-652dd6118978"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" predictedDiStd | \n",
" DiStd | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 836376.0 | \n",
" 836376.0 | \n",
"
\n",
" \n",
" mean | \n",
" 3.3 | \n",
" 3.4 | \n",
"
\n",
" \n",
" std | \n",
" 0.6 | \n",
" 0.6 | \n",
"
\n",
" \n",
" min | \n",
" 0.1 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 3.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.4 | \n",
" 3.4 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.8 | \n",
" 3.9 | \n",
"
\n",
" \n",
" max | \n",
" 4.7 | \n",
" 6.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
],
"text/plain": [
" predictedDiStd DiStd\n",
"count 836376.0 836376.0\n",
"mean 3.3 3.4\n",
"std 0.6 0.6\n",
"min 0.1 0.0\n",
"25% 3.0 3.0\n",
"50% 3.4 3.4\n",
"75% 3.8 3.9\n",
"max 4.7 6.0"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted_std.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"id": "izi94b-cEtC7",
"outputId": "f8e78bff-9147-46f4-f2a9-cef08043343a"
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"predicted_std.plot.scatter(x=\"DiStd\", y=\"predictedDiStd\")\n"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
},
"collapsed": true,
"id": "oRJGGR4gyzjH",
"outputId": "2abaddaf-3c9b-49ec-c30b-a88a427e127e"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 61
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"predicted_mfes.plot.scatter(x=\"predictedMonoMFEs\", y='MFE')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GhfuCYjfCHGJ",
"outputId": "f6687c38-f784-4dac-c91d-dec38a89cb95"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" normalization (Normalizatio (None, 20) 41 \n",
" n) \n",
" \n",
" dense (Dense) (None, 64) 1344 \n",
" \n",
" dense_1 (Dense) (None, 64) 4160 \n",
" \n",
" dense_2 (Dense) (None, 1) 65 \n",
" \n",
"=================================================================\n",
"Total params: 5,610\n",
"Trainable params: 5,569\n",
"Non-trainable params: 41\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"my_model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NixZHwDBGPid"
},
"source": [
"# Testing model on RNAcentral data"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"id": "4vQCqeRC8Kbc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 394
},
"outputId": "b64f57f9-7cf7-4a7e-b4e6-17dc88781b93"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE AA AU \\\n",
"count 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 5729593.0 \n",
"mean 85.1 0.5 0.5 0.5 -26.7 0.1 0.1 \n",
"std 40.6 0.1 0.1 0.1 17.7 0.0 0.0 \n",
"min 20.0 0.0 0.0 0.0 -221.1 0.0 0.0 \n",
"25% 66.0 0.4 0.4 0.4 -35.4 0.0 0.0 \n",
"50% 74.0 0.5 0.5 0.5 -25.7 0.1 0.1 \n",
"75% 107.0 0.6 0.5 0.5 -14.1 0.1 0.1 \n",
"max 200.0 1.0 1.0 1.0 0.0 1.0 0.5 \n",
"\n",
" AG AC UA ... GG GC CA \\\n",
"count 5729593.0 5729592.0 5729592.0 ... 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"min 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"25% 0.1 0.0 0.0 ... 0.0 0.0 0.0 \n",
"50% 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.0 ... 0.1 0.1 0.1 \n",
"max 0.5 0.5 0.0 ... 1.0 0.4 0.5 \n",
"\n",
" CU CG CC MonoMFE mono_z DiMFE di_z \n",
"count 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 5729592.0 \n",
"mean 0.1 0.0 0.1 -20.8 -0.0 -21.5 -0.0 \n",
"std 0.0 0.0 0.0 13.3 1.0 13.6 1.0 \n",
"min 0.0 0.0 0.0 -118.5 -10.0 -140.0 -10.0 \n",
"25% 0.0 0.0 0.0 -27.1 -0.6 -27.9 -0.6 \n",
"50% 0.1 0.0 0.1 -20.0 0.1 -21.0 0.1 \n",
"75% 0.1 0.1 0.1 -10.9 0.7 -11.5 0.7 \n",
"max 0.5 0.3 1.0 0.0 4.4 0.0 4.0 \n",
"\n",
"[8 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729593.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" ... | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
" 5729592.0 | \n",
"
\n",
" \n",
" mean | \n",
" 85.1 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -26.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.8 | \n",
" -0.0 | \n",
" -21.5 | \n",
" -0.0 | \n",
"
\n",
" \n",
" std | \n",
" 40.6 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 17.7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 13.3 | \n",
" 1.0 | \n",
" 13.6 | \n",
" 1.0 | \n",
"
\n",
" \n",
" min | \n",
" 20.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -221.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -118.5 | \n",
" -10.0 | \n",
" -140.0 | \n",
" -10.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 66.0 | \n",
" 0.4 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -35.4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -27.1 | \n",
" -0.6 | \n",
" -27.9 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 50% | \n",
" 74.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -25.7 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -20.0 | \n",
" 0.1 | \n",
" -21.0 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 107.0 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -14.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -10.9 | \n",
" 0.7 | \n",
" -11.5 | \n",
" 0.7 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.0 | \n",
" ... | \n",
" 1.0 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.3 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 4.4 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 64
}
],
"source": [
"test_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"id": "ZfjI4WyLKsE4"
},
"outputs": [],
"source": [
"test_over_60 = test_df[test_df['Length'] >= 60]"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"id": "pNgw2Whgp7MK",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 488
},
"outputId": "38e10ff8-d8e7-4e8e-ba5b-2f130b64ce21"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE AA AU AG AC UA ... \\\n",
"0 200 0.6 0.4 0.6 -59.2 0.1 0.0 0.1 0.1 0.0 ... \n",
"1 72 0.2 0.4 0.5 -11.9 0.1 0.2 0.0 0.0 0.0 ... \n",
"2 104 0.5 0.4 0.5 -31.3 0.0 0.0 0.1 0.1 0.0 ... \n",
"5 69 0.4 0.3 0.5 -17.9 0.1 0.1 0.1 0.0 0.0 ... \n",
"6 70 0.4 0.6 0.5 -13.9 0.1 0.0 0.1 0.1 0.0 ... \n",
"... ... ... ... ... ... .. .. .. .. .. ... \n",
"5729588 73 0.6 0.4 0.4 -25.8 0.0 0.0 0.1 0.0 0.0 ... \n",
"5729589 117 0.6 0.4 0.5 -38.6 0.0 0.1 0.1 0.0 0.0 ... \n",
"5729590 85 0.6 0.4 0.5 -37.3 0.0 0.0 0.1 0.0 0.0 ... \n",
"5729591 113 0.4 0.5 0.5 -22.2 0.1 0.1 0.1 0.1 0.0 ... \n",
"5729592 84 0.6 0.5 0.5 -34.4 0.1 0.0 0.0 0.0 0.0 ... \n",
"\n",
" GG GC CA CU CG CC MonoMFE mono_z DiMFE di_z \n",
"0 0.1 0.1 0.0 0.1 0.1 0.0 -64.8 0.4 -64.9 -2.0 \n",
"1 0.1 0.0 0.0 0.0 0.0 0.0 -7.4 0.3 -10.3 0.6 \n",
"2 0.1 0.0 0.1 0.0 0.1 0.1 -30.0 0.1 -29.6 -0.6 \n",
"5 0.1 0.1 0.0 0.1 0.0 0.0 -13.0 0.8 -16.1 -0.8 \n",
"6 0.0 0.0 0.1 0.1 0.0 0.1 -7.3 -0.7 -10.8 0.1 \n",
"... .. .. .. .. .. .. ... ... ... ... \n",
"5729588 0.1 0.1 0.0 0.1 0.1 0.1 -23.4 -0.2 -20.9 1.2 \n",
"5729589 0.1 0.1 0.1 0.0 0.1 0.1 -35.9 -2.2 -35.0 1.0 \n",
"5729590 0.1 0.1 0.1 0.1 0.1 0.1 -29.2 -0.6 -30.1 1.9 \n",
"5729591 0.0 0.0 0.1 0.0 0.0 0.0 -21.4 0.1 -22.9 1.4 \n",
"5729592 0.1 0.1 0.0 0.0 0.1 0.1 -28.0 0.3 -29.8 -0.9 \n",
"\n",
"[4591323 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 200 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.6 | \n",
" -59.2 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" -64.8 | \n",
" 0.4 | \n",
" -64.9 | \n",
" -2.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 72 | \n",
" 0.2 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -11.9 | \n",
" 0.1 | \n",
" 0.2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -7.4 | \n",
" 0.3 | \n",
" -10.3 | \n",
" 0.6 | \n",
"
\n",
" \n",
" 2 | \n",
" 104 | \n",
" 0.5 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -31.3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -30.0 | \n",
" 0.1 | \n",
" -29.6 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 5 | \n",
" 69 | \n",
" 0.4 | \n",
" 0.3 | \n",
" 0.5 | \n",
" -17.9 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -13.0 | \n",
" 0.8 | \n",
" -16.1 | \n",
" -0.8 | \n",
"
\n",
" \n",
" 6 | \n",
" 70 | \n",
" 0.4 | \n",
" 0.6 | \n",
" 0.5 | \n",
" -13.9 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -7.3 | \n",
" -0.7 | \n",
" -10.8 | \n",
" 0.1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 5729588 | \n",
" 73 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -25.8 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -23.4 | \n",
" -0.2 | \n",
" -20.9 | \n",
" 1.2 | \n",
"
\n",
" \n",
" 5729589 | \n",
" 117 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -38.6 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -35.9 | \n",
" -2.2 | \n",
" -35.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 5729590 | \n",
" 85 | \n",
" 0.6 | \n",
" 0.4 | \n",
" 0.5 | \n",
" -37.3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -29.2 | \n",
" -0.6 | \n",
" -30.1 | \n",
" 1.9 | \n",
"
\n",
" \n",
" 5729591 | \n",
" 113 | \n",
" 0.4 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -22.2 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -21.4 | \n",
" 0.1 | \n",
" -22.9 | \n",
" 1.4 | \n",
"
\n",
" \n",
" 5729592 | \n",
" 84 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -34.4 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -28.0 | \n",
" 0.3 | \n",
" -29.8 | \n",
" -0.9 | \n",
"
\n",
" \n",
"
\n",
"
4591323 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 66
}
],
"source": [
"test_over_60.apply(pd.to_numeric)\n"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"id": "mHv2WLDfkuOR",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 394
},
"outputId": "1811ba3c-f18b-4cb7-83dc-60eb65858bdb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Length GCpercent CGratio AUratio MFE AA AU \\\n",
"count 4591323.0 4591323.0 4591323.0 4591323.0 4591323.0 4591323.0 4591323.0 \n",
"mean 97.7 0.5 0.5 0.5 -31.9 0.1 0.1 \n",
"std 34.9 0.1 0.1 0.1 15.6 0.0 0.0 \n",
"min 60.0 0.0 0.0 0.0 -221.1 0.0 0.0 \n",
"25% 72.0 0.4 0.4 0.4 -38.7 0.0 0.0 \n",
"50% 82.0 0.5 0.5 0.5 -29.0 0.1 0.0 \n",
"75% 117.0 0.6 0.5 0.5 -21.8 0.1 0.1 \n",
"max 200.0 1.0 1.0 1.0 0.0 0.9 0.5 \n",
"\n",
" AG AC UA ... GG GC CA \\\n",
"count 4591323.0 4591322.0 4591322.0 ... 4591322.0 4591322.0 4591322.0 \n",
"mean 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"std 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"min 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"25% 0.1 0.0 0.0 ... 0.1 0.0 0.0 \n",
"50% 0.1 0.0 0.0 ... 0.1 0.1 0.1 \n",
"75% 0.1 0.1 0.0 ... 0.1 0.1 0.1 \n",
"max 0.5 0.5 0.0 ... 0.4 0.3 0.5 \n",
"\n",
" CU CG CC MonoMFE mono_z DiMFE di_z \n",
"count 4591322.0 4591322.0 4591322.0 4591322.0 4591322.0 4591322.0 4591322.0 \n",
"mean 0.1 0.0 0.1 -24.8 -0.0 -25.7 -0.0 \n",
"std 0.0 0.0 0.0 11.7 1.0 11.8 1.0 \n",
"min 0.0 0.0 0.0 -118.5 -10.0 -140.0 -10.0 \n",
"25% 0.0 0.0 0.0 -29.8 -0.6 -30.7 -0.6 \n",
"50% 0.1 0.0 0.1 -22.3 0.1 -23.1 0.1 \n",
"75% 0.1 0.1 0.1 -17.7 0.7 -18.6 0.7 \n",
"max 0.5 0.3 0.4 0.0 4.4 0.0 4.0 \n",
"\n",
"[8 rows x 25 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Length | \n",
" GCpercent | \n",
" CGratio | \n",
" AUratio | \n",
" MFE | \n",
" AA | \n",
" AU | \n",
" AG | \n",
" AC | \n",
" UA | \n",
" ... | \n",
" GG | \n",
" GC | \n",
" CA | \n",
" CU | \n",
" CG | \n",
" CC | \n",
" MonoMFE | \n",
" mono_z | \n",
" DiMFE | \n",
" di_z | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591323.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" ... | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
" 4591322.0 | \n",
"
\n",
" \n",
" mean | \n",
" 97.7 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -31.9 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -24.8 | \n",
" -0.0 | \n",
" -25.7 | \n",
" -0.0 | \n",
"
\n",
" \n",
" std | \n",
" 34.9 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 15.6 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 11.7 | \n",
" 1.0 | \n",
" 11.8 | \n",
" 1.0 | \n",
"
\n",
" \n",
" min | \n",
" 60.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -221.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -118.5 | \n",
" -10.0 | \n",
" -140.0 | \n",
" -10.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 72.0 | \n",
" 0.4 | \n",
" 0.4 | \n",
" 0.4 | \n",
" -38.7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" -29.8 | \n",
" -0.6 | \n",
" -30.7 | \n",
" -0.6 | \n",
"
\n",
" \n",
" 50% | \n",
" 82.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -29.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" 0.1 | \n",
" -22.3 | \n",
" 0.1 | \n",
" -23.1 | \n",
" 0.1 | \n",
"
\n",
" \n",
" 75% | \n",
" 117.0 | \n",
" 0.6 | \n",
" 0.5 | \n",
" 0.5 | \n",
" -21.8 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.0 | \n",
" ... | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" 0.1 | \n",
" -17.7 | \n",
" 0.7 | \n",
" -18.6 | \n",
" 0.7 | \n",
"
\n",
" \n",
" max | \n",
" 200.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.9 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.0 | \n",
" ... | \n",
" 0.4 | \n",
" 0.3 | \n",
" 0.5 | \n",
" 0.5 | \n",
" 0.3 | \n",
" 0.4 | \n",
" 0.0 | \n",
" 4.4 | \n",
" 0.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
"
\n",
"
8 rows × 25 columns
\n",
"
\n",
"
\n",
" \n",
" \n",
"\n",
" \n",
"
\n",
"
\n",
" "
]
},
"metadata": {},
"execution_count": 67
}
],
"source": [
"test_over_60.describe()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"id": "udfgrCE1f9To",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "264c7917-662e-48a2-eb3e-3b529d056cd5"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['Length', 'GCpercent', 'CGratio', 'AUratio', 'AA', 'AU', 'AG', 'AC',\n",
" 'UA', 'UU', 'UG', 'UC', 'GA', 'GU', 'GG', 'GC', 'CA', 'CU', 'CG', 'CC'],\n",
" dtype='object')"
]
},
"metadata": {},
"execution_count": 68
}
],
"source": [
"my_feature.columns"
]
},
{
"cell_type": "markdown",
"source": [
"The following cell shows an example of how you can predict MFEs for a large dataset. Here we are analyzing all sequences between 60-200 nt from the RNACentral database."
],
"metadata": {
"id": "lcWNuJXC4ep_"
}
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"id": "iz39Jue9KirH"
},
"outputs": [],
"source": [
"### Predicting with all 21 features:\n",
"rna_central_MonoMFE_result = my_model.predict(test_over_60[[\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", 'AA', 'AU', 'AG',\n",
" 'AC', 'UA', 'UU', 'UG', 'UC', 'GA', 'GU', 'GG', 'GC', 'CA', 'CU', 'CG',\n",
" 'CC']])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jp0jV5fUijwv"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes = []\n",
"#rna_central_predicted_stds = pd.DataFrame(rna_central_MonoStd_result)\n",
"rna_central_predicted_mfes = pd.DataFrame(rna_central_MonoMFE_result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WybQRGJGUNs1"
},
"outputs": [],
"source": [
"#rna_central_predicted_stds = pd.DataFrame(rna_central_MonoStd_result)\n",
"#rna_central_predicted_mfes.columns = [\"predictedMonoStds\"]\n",
"rna_central_predicted_mfes[\"predictedMonoStds\"] = rna_central_MonoStd_result\n",
"rna_central_predicted_mfes[\"predictedMonoMFEs\"] = rna_central_MonoMFE_result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "brYLobmbtqWA"
},
"outputs": [],
"source": [
"len(test_over_60['MFE'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "M0EU8wJ6OXrL"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes[\"MFE\"] = test_over_60['MFE'].to_list()\n",
"rna_central_predicted_mfes[\"Length\"] = test_over_60['Length'].to_list()\n",
"rna_central_predicted_mfes[\"MonoMFE\"] = test_over_60['MonoMFE'].to_list()\n",
"rna_central_predicted_mfes[\"DiMFE\"] = test_over_60['DiMFE'].to_list()\n",
"\n",
"#predicted_mfes[\"DiMFE\"] = di_mfe\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WtXiprh1_W-u"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "dJx4eR5c-dUs"
},
"outputs": [],
"source": [
"my_label.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gpks0PDPSeYa"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"predictedMonoMFEs\", y='MonoMFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ruoUK3wkpVyZ"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"predictedMonoMFEs\", y='MonoMFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "92q1yXeJpXYJ"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"predictedMonoMFEs\", y='MFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2P-nvPkTTXdF"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"predictedMonoMFEs\", y='MFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "b0cSNTOQo8ZG"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"MonoMFE\", y='MFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tgImj2Ajp4Fu"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"DiMFE\", y='MFE')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KtU4uiEip68c"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"MonoMFE\", y='DiMFE')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yy_R7lIuVf9P"
},
"source": [
"Calculating z-scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "aejMMcHoViBv"
},
"outputs": [],
"source": [
"### Create MFE list\n",
"mfe_list = test_over_60['MFE'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "P_7W-UJCU46Z"
},
"outputs": [],
"source": [
"zscore_list = []\n",
"for i in range(0, len(mfe_list)):\n",
" mfe = (mfe_list[i])\n",
" print(mfe)\n",
" mean_mfe = rna_central_meanMFE_result[i][0]\n",
" print(mean_mfe)\n",
" std = rna_central_MonoStd_result[i][0]\n",
" print(std)\n",
" if mean_mfe > 0:\n",
" #print(mean_mfe)\n",
" mean_mfe = 0\n",
" zscore = round(float((mfe-mean_mfe)/std), 2)\n",
" zscore_list.append(zscore)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TQRIJSetay-m"
},
"outputs": [],
"source": [
"len(zscore_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "EPubCKBsV2LY"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes['predicted_Z-scores'] = zscore_list"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "srYVRcOaWF91"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes['Mono_Z-scores'] = test_over_60['mono_z'].to_list()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "A-qVx0CmXUJb"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes['Di_Z-scores'] = test_over_60['di_z'].to_list()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "VWKQg1FaWD6K"
},
"outputs": [],
"source": [
"#rna_central_predicted_mfes.plot.scatter(x=\"predicted_Z-scores\", y='Mono_Z-scores', xlim=[-10, 10], ylim=[-10,10])\n",
"rna_central_predicted_mfes.plot.scatter(x=\"predicted_Z-scores\", y='Mono_Z-scores')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "U4fXpJUYXXCq"
},
"outputs": [],
"source": [
"rna_central_predicted_mfes.plot.scatter(x=\"predicted_Z-scores\", y='Di_Z-scores', xlim=[-20, 10])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nA7yh96S05Lo"
},
"source": [
"Violin Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TG6c2FF0049_"
},
"outputs": [],
"source": [
"fig, axes = plt.subplots()\n",
"\n",
"axes.violinplot(dataset = test_df[test_df['MonoMFE'].values])\n",
"\n",
"axes.set_title('Predicted Mono MFEs')\n",
"axes.set_xlabel('Predicted Mono MFE')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"source": [
"### Questions or suggestions?\n",
"E-mail:\n",
"\n",
"Ryan J. Andrews ryan.j.rna@gmail.com\n",
"Walter N. Moss wmoss@iastate.edu\n",
"Warren B. Rouse wbrouse@iastate.edu\n",
"\n"
],
"metadata": {
"id": "_fCqd4qLzMyn"
}
},
{
"cell_type": "markdown",
"metadata": {
"id": "l7OX4RQzd3dR"
},
"source": [
"# Making Predictions with model\n",
"Once you have both a standard deviation and mean MFE model built and loaded, you can calculate z"
]
},
{
"cell_type": "markdown",
"source": [
"Now we are going to clone the ScanFold 2.0 github repo so we can easily access the models stored there."
],
"metadata": {
"id": "-bxTUcqH86Ql"
}
},
{
"cell_type": "code",
"source": [
"! git clone https://github.com/moss-lab/ScanFold2.0"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "n2-9L12k_53b",
"outputId": "8ea3072a-1651-47c4-b3f5-05e131579fd5"
},
"execution_count": 79,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'ScanFold2.0'...\n",
"remote: Enumerating objects: 192, done.\u001b[K\n",
"remote: Counting objects: 100% (33/33), done.\u001b[K\n",
"remote: Compressing objects: 100% (5/5), done.\u001b[K\n",
"remote: Total 192 (delta 29), reused 28 (delta 28), pack-reused 159\u001b[K\n",
"Receiving objects: 100% (192/192), 319.09 KiB | 3.80 MiB/s, done.\n",
"Resolving deltas: 100% (108/108), done.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"mean_mfe_model = tf.keras.models.load_model('/content/ScanFold2.0/MeanMFE')\n",
"std_dev_model = tf.keras.models.load_model('/content/ScanFold2.0/StdDev')\n",
"di_mean_mfe_model = tf.keras.models.load_model('/content/ScanFold2.0/DiMFE')\n",
"di_std_dev_model = tf.keras.models.load_model('/content/ScanFold2.0/DiStd')"
],
"metadata": {
"id": "m-nUGrot8Gb_"
},
"execution_count": 100,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "q03hC506k96D"
},
"source": [
"### Installing ViennaRNA\n",
"First we will install RNAfold (from ViennaRNA)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"id": "Y9nOW6sbgMBf",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8974ef95-22a2-4d4d-e5d6-34fb01c04e7a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"PREFIX=/usr/local\n",
"installing: python-3.6.5-hc3d631a_2 ...\n",
"installing: ca-certificates-2018.03.07-0 ...\n",
"installing: conda-env-2.6.0-h36134e3_1 ...\n",
"installing: libgcc-ng-7.2.0-hdf63c60_3 ...\n",
"installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...\n",
"installing: libffi-3.2.1-hd88cf55_4 ...\n",
"installing: ncurses-6.1-hf484d3e_0 ...\n",
"installing: openssl-1.0.2o-h20670df_0 ...\n",
"installing: tk-8.6.7-hc745277_3 ...\n",
"installing: xz-5.2.4-h14c3975_4 ...\n",
"installing: yaml-0.1.7-had09818_2 ...\n",
"installing: zlib-1.2.11-ha838bed_2 ...\n",
"installing: libedit-3.1.20170329-h6b74fdf_2 ...\n",
"installing: readline-7.0-ha6073c6_4 ...\n",
"installing: sqlite-3.23.1-he433501_0 ...\n",
"installing: asn1crypto-0.24.0-py36_0 ...\n",
"installing: certifi-2018.4.16-py36_0 ...\n",
"installing: chardet-3.0.4-py36h0f667ec_1 ...\n",
"installing: idna-2.6-py36h82fb2a8_1 ...\n",
"installing: pycosat-0.6.3-py36h0a5515d_0 ...\n",
"installing: pycparser-2.18-py36hf9f622e_1 ...\n",
"installing: pysocks-1.6.8-py36_0 ...\n",
"installing: ruamel_yaml-0.15.37-py36h14c3975_2 ...\n",
"installing: six-1.11.0-py36h372c433_1 ...\n",
"installing: cffi-1.11.5-py36h9745a5d_0 ...\n",
"installing: setuptools-39.2.0-py36_0 ...\n",
"installing: cryptography-2.2.2-py36h14c3975_0 ...\n",
"installing: wheel-0.31.1-py36_0 ...\n",
"installing: pip-10.0.1-py36_0 ...\n",
"installing: pyopenssl-18.0.0-py36_0 ...\n",
"installing: urllib3-1.22-py36hbe7ace6_0 ...\n",
"installing: requests-2.18.4-py36he2e5f8d_1 ...\n",
"installing: conda-4.5.4-py36_0 ...\n",
"installation finished.\n",
"WARNING:\n",
" You currently have a PYTHONPATH environment variable set. This may cause\n",
" unexpected behavior when running the Python interpreter in Miniconda3.\n",
" For best results, please verify that your PYTHONPATH only points to\n",
" directories of packages that are compatible with the Python interpreter\n",
" in Miniconda3: /usr/local\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"--2022-09-16 19:45:22-- https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
"Resolving repo.continuum.io (repo.continuum.io)... 104.18.201.79, 104.18.200.79, 2606:4700::6812:c94f, ...\n",
"Connecting to repo.continuum.io (repo.continuum.io)|104.18.201.79|:443... connected.\n",
"HTTP request sent, awaiting response... 301 Moved Permanently\n",
"Location: https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh [following]\n",
"--2022-09-16 19:45:22-- https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
"Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.130.3, 104.16.131.3, 2606:4700::6810:8203, ...\n",
"Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.130.3|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 58468498 (56M) [application/x-sh]\n",
"Saving to: ‘Miniconda3-4.5.4-Linux-x86_64.sh’\n",
"\n",
" 0K .......... .......... .......... .......... .......... 0% 45.6M 1s\n",
" 50K .......... .......... .......... .......... .......... 0% 4.23M 7s\n",
" 100K .......... .......... .......... .......... .......... 0% 5.33M 8s\n",
" 150K .......... .......... .......... .......... .......... 0% 27.2M 7s\n",
" 200K .......... .......... .......... .......... .......... 0% 67.4M 6s\n",
" 250K .......... .......... .......... .......... .......... 0% 70.8M 5s\n",
" 300K .......... .......... .......... .......... .......... 0% 6.09M 5s\n",
" 350K .......... .......... .......... .......... .......... 0% 61.5M 5s\n",
" 400K .......... .......... .......... .......... .......... 0% 79.9M 4s\n",
" 450K .......... .......... .......... .......... .......... 0% 79.0M 4s\n",
" 500K .......... .......... .......... .......... .......... 0% 151M 4s\n",
" 550K .......... .......... .......... .......... .......... 1% 92.4M 3s\n",
" 600K .......... .......... .......... .......... .......... 1% 83.5M 3s\n",
" 650K .......... .......... .......... .......... .......... 1% 188M 3s\n",
" 700K .......... .......... .......... .......... .......... 1% 84.9M 3s\n",
" 750K .......... .......... .......... .......... .......... 1% 56.1M 3s\n",
" 800K .......... .......... .......... .......... .......... 1% 68.9M 3s\n",
" 850K .......... .......... .......... .......... .......... 1% 9.15M 3s\n",
" 900K .......... .......... .......... .......... .......... 1% 64.3M 3s\n",
" 950K .......... .......... .......... .......... .......... 1% 57.9M 3s\n",
" 1000K .......... .......... .......... .......... .......... 1% 78.5M 2s\n",
" 1050K .......... .......... .......... .......... .......... 1% 65.5M 2s\n",
" 1100K .......... .......... .......... .......... .......... 2% 166M 2s\n",
" 1150K .......... .......... .......... .......... .......... 2% 67.1M 2s\n",
" 1200K .......... .......... .......... .......... .......... 2% 126M 2s\n",
" 1250K .......... .......... .......... .......... .......... 2% 342M 2s\n",
" 1300K .......... .......... .......... .......... .......... 2% 356M 2s\n",
" 1350K .......... .......... .......... .......... .......... 2% 224M 2s\n",
" 1400K .......... .......... .......... .......... .......... 2% 117M 2s\n",
" 1450K .......... .......... .......... .......... .......... 2% 50.6M 2s\n",
" 1500K .......... .......... .......... .......... .......... 2% 132M 2s\n",
" 1550K .......... .......... .......... .......... .......... 2% 174M 2s\n",
" 1600K .......... .......... .......... .......... .......... 2% 172M 2s\n",
" 1650K .......... .......... .......... .......... .......... 2% 172M 2s\n",
" 1700K .......... .......... .......... .......... .......... 3% 73.6M 2s\n",
" 1750K .......... .......... .......... .......... .......... 3% 95.2M 2s\n",
" 1800K .......... .......... .......... .......... .......... 3% 121M 2s\n",
" 1850K .......... .......... .......... .......... .......... 3% 142M 2s\n",
" 1900K .......... .......... .......... .......... .......... 3% 25.8M 2s\n",
" 1950K .......... .......... .......... .......... .......... 3% 58.3M 2s\n",
" 2000K .......... .......... .......... .......... .......... 3% 191M 2s\n",
" 2050K .......... .......... .......... .......... .......... 3% 306M 1s\n",
" 2100K .......... .......... .......... .......... .......... 3% 277M 1s\n",
" 2150K .......... .......... .......... .......... .......... 3% 217M 1s\n",
" 2200K .......... .......... .......... .......... .......... 3% 341M 1s\n",
" 2250K .......... .......... .......... .......... .......... 4% 79.6M 1s\n",
" 2300K .......... .......... .......... .......... .......... 4% 63.0M 1s\n",
" 2350K .......... .......... .......... .......... .......... 4% 54.3M 1s\n",
" 2400K .......... .......... .......... .......... .......... 4% 58.9M 1s\n",
" 2450K .......... .......... .......... .......... .......... 4% 111M 1s\n",
" 2500K .......... .......... .......... .......... .......... 4% 331M 1s\n",
" 2550K .......... .......... .......... .......... .......... 4% 225M 1s\n",
" 2600K .......... .......... .......... .......... .......... 4% 347M 1s\n",
" 2650K .......... .......... .......... .......... .......... 4% 256M 1s\n",
" 2700K .......... .......... .......... .......... .......... 4% 306M 1s\n",
" 2750K .......... .......... .......... .......... .......... 4% 329M 1s\n",
" 2800K .......... .......... .......... .......... .......... 4% 282M 1s\n",
" 2850K .......... .......... .......... .......... .......... 5% 340M 1s\n",
" 2900K .......... .......... .......... .......... .......... 5% 275M 1s\n",
" 2950K .......... .......... .......... .......... .......... 5% 182M 1s\n",
" 3000K .......... .......... .......... .......... .......... 5% 299M 1s\n",
" 3050K .......... .......... .......... .......... .......... 5% 363M 1s\n",
" 3100K .......... .......... .......... .......... .......... 5% 293M 1s\n",
" 3150K .......... .......... .......... .......... .......... 5% 236M 1s\n",
" 3200K .......... .......... .......... .......... .......... 5% 150M 1s\n",
" 3250K .......... .......... .......... .......... .......... 5% 256M 1s\n",
" 3300K .......... .......... .......... .......... .......... 5% 345M 1s\n",
" 3350K .......... .......... .......... .......... .......... 5% 247M 1s\n",
" 3400K .......... .......... .......... .......... .......... 6% 39.8M 1s\n",
" 3450K .......... .......... .......... .......... .......... 6% 52.7M 1s\n",
" 3500K .......... .......... .......... .......... .......... 6% 61.3M 1s\n",
" 3550K .......... .......... .......... .......... .......... 6% 69.8M 1s\n",
" 3600K .......... .......... .......... .......... .......... 6% 177M 1s\n",
" 3650K .......... .......... .......... .......... .......... 6% 175M 1s\n",
" 3700K .......... .......... .......... .......... .......... 6% 191M 1s\n",
" 3750K .......... .......... .......... .......... .......... 6% 140M 1s\n",
" 3800K .......... .......... .......... .......... .......... 6% 102M 1s\n",
" 3850K .......... .......... .......... .......... .......... 6% 135M 1s\n",
" 3900K .......... .......... .......... .......... .......... 6% 186M 1s\n",
" 3950K .......... .......... .......... .......... .......... 7% 182M 1s\n",
" 4000K .......... .......... .......... .......... .......... 7% 186M 1s\n",
" 4050K .......... .......... .......... .......... .......... 7% 189M 1s\n",
" 4100K .......... .......... .......... .......... .......... 7% 207M 1s\n",
" 4150K .......... .......... .......... .......... .......... 7% 166M 1s\n",
" 4200K .......... .......... .......... .......... .......... 7% 228M 1s\n",
" 4250K .......... .......... .......... .......... .......... 7% 380M 1s\n",
" 4300K .......... .......... .......... .......... .......... 7% 339M 1s\n",
" 4350K .......... .......... .......... .......... .......... 7% 173M 1s\n",
" 4400K .......... .......... .......... .......... .......... 7% 297M 1s\n",
" 4450K .......... .......... .......... .......... .......... 7% 184M 1s\n",
" 4500K .......... .......... .......... .......... .......... 7% 282M 1s\n",
" 4550K .......... .......... .......... .......... .......... 8% 290M 1s\n",
" 4600K .......... .......... .......... .......... .......... 8% 366M 1s\n",
" 4650K .......... .......... .......... .......... .......... 8% 355M 1s\n",
" 4700K .......... .......... .......... .......... .......... 8% 371M 1s\n",
" 4750K .......... .......... .......... .......... .......... 8% 208M 1s\n",
" 4800K .......... .......... .......... .......... .......... 8% 355M 1s\n",
" 4850K .......... .......... .......... .......... .......... 8% 333M 1s\n",
" 4900K .......... .......... .......... .......... .......... 8% 327M 1s\n",
" 4950K .......... .......... .......... .......... .......... 8% 273M 1s\n",
" 5000K .......... .......... .......... .......... .......... 8% 380M 1s\n",
" 5050K .......... .......... .......... .......... .......... 8% 301M 1s\n",
" 5100K .......... .......... .......... .......... .......... 9% 367M 1s\n",
" 5150K .......... .......... .......... .......... .......... 9% 337M 1s\n",
" 5200K .......... .......... .......... .......... .......... 9% 364M 1s\n",
" 5250K .......... .......... .......... .......... .......... 9% 376M 1s\n",
" 5300K .......... .......... .......... .......... .......... 9% 20.0M 1s\n",
" 5350K .......... .......... .......... .......... .......... 9% 141M 1s\n",
" 5400K .......... .......... .......... .......... .......... 9% 284M 1s\n",
" 5450K .......... .......... .......... .......... .......... 9% 357M 1s\n",
" 5500K .......... .......... .......... .......... .......... 9% 362M 1s\n",
" 5550K .......... .......... .......... .......... .......... 9% 215M 1s\n",
" 5600K .......... .......... .......... .......... .......... 9% 238M 1s\n",
" 5650K .......... .......... .......... .......... .......... 9% 225M 1s\n",
" 5700K .......... .......... .......... .......... .......... 10% 278M 1s\n",
" 5750K .......... .......... .......... .......... .......... 10% 229M 1s\n",
" 5800K .......... .......... .......... .......... .......... 10% 164M 1s\n",
" 5850K .......... .......... .......... .......... .......... 10% 266M 1s\n",
" 5900K .......... .......... .......... .......... .......... 10% 290M 1s\n",
" 5950K .......... .......... .......... .......... .......... 10% 338M 1s\n",
" 6000K .......... .......... .......... .......... .......... 10% 370M 1s\n",
" 6050K .......... .......... .......... .......... .......... 10% 373M 1s\n",
" 6100K .......... .......... .......... .......... .......... 10% 224M 1s\n",
" 6150K .......... .......... .......... .......... .......... 10% 292M 1s\n",
" 6200K .......... .......... .......... .......... .......... 10% 380M 1s\n",
" 6250K .......... .......... .......... .......... .......... 11% 369M 1s\n",
" 6300K .......... .......... .......... .......... .......... 11% 372M 1s\n",
" 6350K .......... .......... .......... .......... .......... 11% 101M 1s\n",
" 6400K .......... .......... .......... .......... .......... 11% 55.7M 1s\n",
" 6450K .......... .......... .......... .......... .......... 11% 50.8M 1s\n",
" 6500K .......... .......... .......... .......... .......... 11% 58.5M 1s\n",
" 6550K .......... .......... .......... .......... .......... 11% 69.4M 1s\n",
" 6600K .......... .......... .......... .......... .......... 11% 360M 1s\n",
" 6650K .......... .......... .......... .......... .......... 11% 333M 1s\n",
" 6700K .......... .......... .......... .......... .......... 11% 315M 1s\n",
" 6750K .......... .......... .......... .......... .......... 11% 306M 1s\n",
" 6800K .......... .......... .......... .......... .......... 11% 338M 1s\n",
" 6850K .......... .......... .......... .......... .......... 12% 359M 1s\n",
" 6900K .......... .......... .......... .......... .......... 12% 345M 1s\n",
" 6950K .......... .......... .......... .......... .......... 12% 299M 1s\n",
" 7000K .......... .......... .......... .......... .......... 12% 364M 1s\n",
" 7050K .......... .......... .......... .......... .......... 12% 297M 1s\n",
" 7100K .......... .......... .......... .......... .......... 12% 359M 1s\n",
" 7150K .......... .......... .......... .......... .......... 12% 311M 1s\n",
" 7200K .......... .......... .......... .......... .......... 12% 373M 1s\n",
" 7250K .......... .......... .......... .......... .......... 12% 370M 1s\n",
" 7300K .......... .......... .......... .......... .......... 12% 370M 1s\n",
" 7350K .......... .......... .......... .......... .......... 12% 322M 1s\n",
" 7400K .......... .......... .......... .......... .......... 13% 293M 1s\n",
" 7450K .......... .......... .......... .......... .......... 13% 256M 1s\n",
" 7500K .......... .......... .......... .......... .......... 13% 370M 1s\n",
" 7550K .......... .......... .......... .......... .......... 13% 325M 1s\n",
" 7600K .......... .......... .......... .......... .......... 13% 374M 1s\n",
" 7650K .......... .......... .......... .......... .......... 13% 237M 1s\n",
" 7700K .......... .......... .......... .......... .......... 13% 286M 1s\n",
" 7750K .......... .......... .......... .......... .......... 13% 312M 1s\n",
" 7800K .......... .......... .......... .......... .......... 13% 333M 1s\n",
" 7850K .......... .......... .......... .......... .......... 13% 377M 1s\n",
" 7900K .......... .......... .......... .......... .......... 13% 268M 1s\n",
" 7950K .......... .......... .......... .......... .......... 14% 216M 1s\n",
" 8000K .......... .......... .......... .......... .......... 14% 128M 1s\n",
" 8050K .......... .......... .......... .......... .......... 14% 249M 1s\n",
" 8100K .......... .......... .......... .......... .......... 14% 294M 1s\n",
" 8150K .......... .......... .......... .......... .......... 14% 205M 1s\n",
" 8200K .......... .......... .......... .......... .......... 14% 224M 1s\n",
" 8250K .......... .......... .......... .......... .......... 14% 165M 1s\n",
" 8300K .......... .......... .......... .......... .......... 14% 13.9M 1s\n",
" 8350K .......... .......... .......... .......... .......... 14% 63.6M 1s\n",
" 8400K .......... .......... .......... .......... .......... 14% 85.0M 1s\n",
" 8450K .......... .......... .......... .......... .......... 14% 70.7M 1s\n",
" 8500K .......... .......... .......... .......... .......... 14% 146M 1s\n",
" 8550K .......... .......... .......... .......... .......... 15% 138M 1s\n",
" 8600K .......... .......... .......... .......... .......... 15% 166M 1s\n",
" 8650K .......... .......... .......... .......... .......... 15% 169M 1s\n",
" 8700K .......... .......... .......... .......... .......... 15% 185M 1s\n",
" 8750K .......... .......... .......... .......... .......... 15% 162M 1s\n",
" 8800K .......... .......... .......... .......... .......... 15% 158M 1s\n",
" 8850K .......... .......... .......... .......... .......... 15% 163M 1s\n",
" 8900K .......... .......... .......... .......... .......... 15% 205M 1s\n",
" 8950K .......... .......... .......... .......... .......... 15% 176M 1s\n",
" 9000K .......... .......... .......... .......... .......... 15% 118M 1s\n",
" 9050K .......... .......... .......... .......... .......... 15% 206M 1s\n",
" 9100K .......... .......... .......... .......... .......... 16% 166M 1s\n",
" 9150K .......... .......... .......... .......... .......... 16% 171M 1s\n",
" 9200K .......... .......... .......... .......... .......... 16% 181M 1s\n",
" 9250K .......... .......... .......... .......... .......... 16% 161M 1s\n",
" 9300K .......... .......... .......... .......... .......... 16% 197M 1s\n",
" 9350K .......... .......... .......... .......... .......... 16% 167M 1s\n",
" 9400K .......... .......... .......... .......... .......... 16% 205M 1s\n",
" 9450K .......... .......... .......... .......... .......... 16% 195M 1s\n",
" 9500K .......... .......... .......... .......... .......... 16% 152M 1s\n",
" 9550K .......... .......... .......... .......... .......... 16% 157M 1s\n",
" 9600K .......... .......... .......... .......... .......... 16% 190M 1s\n",
" 9650K .......... .......... .......... .......... .......... 16% 168M 1s\n",
" 9700K .......... .......... .......... .......... .......... 17% 168M 1s\n",
" 9750K .......... .......... .......... .......... .......... 17% 130M 1s\n",
" 9800K .......... .......... .......... .......... .......... 17% 167M 1s\n",
" 9850K .......... .......... .......... .......... .......... 17% 174M 0s\n",
" 9900K .......... .......... .......... .......... .......... 17% 172M 0s\n",
" 9950K .......... .......... .......... .......... .......... 17% 136M 0s\n",
" 10000K .......... .......... .......... .......... .......... 17% 163M 0s\n",
" 10050K .......... .......... .......... .......... .......... 17% 170M 0s\n",
" 10100K .......... .......... .......... .......... .......... 17% 162M 0s\n",
" 10150K .......... .......... .......... .......... .......... 17% 141M 0s\n",
" 10200K .......... .......... .......... .......... .......... 17% 195M 0s\n",
" 10250K .......... .......... .......... .......... .......... 18% 217M 0s\n",
" 10300K .......... .......... .......... .......... .......... 18% 193M 0s\n",
" 10350K .......... .......... .......... .......... .......... 18% 180M 0s\n",
" 10400K .......... .......... .......... .......... .......... 18% 198M 0s\n",
" 10450K .......... .......... .......... .......... .......... 18% 184M 0s\n",
" 10500K .......... .......... .......... .......... .......... 18% 178M 0s\n",
" 10550K .......... .......... .......... .......... .......... 18% 144M 0s\n",
" 10600K .......... .......... .......... .......... .......... 18% 188M 0s\n",
" 10650K .......... .......... .......... .......... .......... 18% 207M 0s\n",
" 10700K .......... .......... .......... .......... .......... 18% 151M 0s\n",
" 10750K .......... .......... .......... .......... .......... 18% 155M 0s\n",
" 10800K .......... .......... .......... .......... .......... 19% 212M 0s\n",
" 10850K .......... .......... .......... .......... .......... 19% 200M 0s\n",
" 10900K .......... .......... .......... .......... .......... 19% 206M 0s\n",
" 10950K .......... .......... .......... .......... .......... 19% 177M 0s\n",
" 11000K .......... .......... .......... .......... .......... 19% 199M 0s\n",
" 11050K .......... .......... .......... .......... .......... 19% 218M 0s\n",
" 11100K .......... .......... .......... .......... .......... 19% 224M 0s\n",
" 11150K .......... .......... .......... .......... .......... 19% 169M 0s\n",
" 11200K .......... .......... .......... .......... .......... 19% 205M 0s\n",
" 11250K .......... .......... .......... .......... .......... 19% 175M 0s\n",
" 11300K .......... .......... .......... .......... .......... 19% 204M 0s\n",
" 11350K .......... .......... .......... .......... .......... 19% 166M 0s\n",
" 11400K .......... .......... .......... .......... .......... 20% 190M 0s\n",
" 11450K .......... .......... .......... .......... .......... 20% 183M 0s\n",
" 11500K .......... .......... .......... .......... .......... 20% 204M 0s\n",
" 11550K .......... .......... .......... .......... .......... 20% 191M 0s\n",
" 11600K .......... .......... .......... .......... .......... 20% 212M 0s\n",
" 11650K .......... .......... .......... .......... .......... 20% 209M 0s\n",
" 11700K .......... .......... .......... .......... .......... 20% 193M 0s\n",
" 11750K .......... .......... .......... .......... .......... 20% 181M 0s\n",
" 11800K .......... .......... .......... .......... .......... 20% 214M 0s\n",
" 11850K .......... .......... .......... .......... .......... 20% 205M 0s\n",
" 11900K .......... .......... .......... .......... .......... 20% 180M 0s\n",
" 11950K .......... .......... .......... .......... .......... 21% 94.3M 0s\n",
" 12000K .......... .......... .......... .......... .......... 21% 145M 0s\n",
" 12050K .......... .......... .......... .......... .......... 21% 163M 0s\n",
" 12100K .......... .......... .......... .......... .......... 21% 163M 0s\n",
" 12150K .......... .......... .......... .......... .......... 21% 130M 0s\n",
" 12200K .......... .......... .......... .......... .......... 21% 146M 0s\n",
" 12250K .......... .......... .......... .......... .......... 21% 198M 0s\n",
" 12300K .......... .......... .......... .......... .......... 21% 205M 0s\n",
" 12350K .......... .......... .......... .......... .......... 21% 187M 0s\n",
" 12400K .......... .......... .......... .......... .......... 21% 168M 0s\n",
" 12450K .......... .......... .......... .......... .......... 21% 205M 0s\n",
" 12500K .......... .......... .......... .......... .......... 21% 186M 0s\n",
" 12550K .......... .......... .......... .......... .......... 22% 163M 0s\n",
" 12600K .......... .......... .......... .......... .......... 22% 213M 0s\n",
" 12650K .......... .......... .......... .......... .......... 22% 226M 0s\n",
" 12700K .......... .......... .......... .......... .......... 22% 168M 0s\n",
" 12750K .......... .......... .......... .......... .......... 22% 167M 0s\n",
" 12800K .......... .......... .......... .......... .......... 22% 209M 0s\n",
" 12850K .......... .......... .......... .......... .......... 22% 217M 0s\n",
" 12900K .......... .......... .......... .......... .......... 22% 192M 0s\n",
" 12950K .......... .......... .......... .......... .......... 22% 170M 0s\n",
" 13000K .......... .......... .......... .......... .......... 22% 206M 0s\n",
" 13050K .......... .......... .......... .......... .......... 22% 211M 0s\n",
" 13100K .......... .......... .......... .......... .......... 23% 202M 0s\n",
" 13150K .......... .......... .......... .......... .......... 23% 184M 0s\n",
" 13200K .......... .......... .......... .......... .......... 23% 200M 0s\n",
" 13250K .......... .......... .......... .......... .......... 23% 193M 0s\n",
" 13300K .......... .......... .......... .......... .......... 23% 172M 0s\n",
" 13350K .......... .......... .......... .......... .......... 23% 171M 0s\n",
" 13400K .......... .......... .......... .......... .......... 23% 188M 0s\n",
" 13450K .......... .......... .......... .......... .......... 23% 163M 0s\n",
" 13500K .......... .......... .......... .......... .......... 23% 172M 0s\n",
" 13550K .......... .......... .......... .......... .......... 23% 180M 0s\n",
" 13600K .......... .......... .......... .......... .......... 23% 215M 0s\n",
" 13650K .......... .......... .......... .......... .......... 23% 214M 0s\n",
" 13700K .......... .......... .......... .......... .......... 24% 206M 0s\n",
" 13750K .......... .......... .......... .......... .......... 24% 169M 0s\n",
" 13800K .......... .......... .......... .......... .......... 24% 187M 0s\n",
" 13850K .......... .......... .......... .......... .......... 24% 159M 0s\n",
" 13900K .......... .......... .......... .......... .......... 24% 163M 0s\n",
" 13950K .......... .......... .......... .......... .......... 24% 78.2M 0s\n",
" 14000K .......... .......... .......... .......... .......... 24% 154M 0s\n",
" 14050K .......... .......... .......... .......... .......... 24% 165M 0s\n",
" 14100K .......... .......... .......... .......... .......... 24% 167M 0s\n",
" 14150K .......... .......... .......... .......... .......... 24% 144M 0s\n",
" 14200K .......... .......... .......... .......... .......... 24% 163M 0s\n",
" 14250K .......... .......... .......... .......... .......... 25% 190M 0s\n",
" 14300K .......... .......... .......... .......... .......... 25% 190M 0s\n",
" 14350K .......... .......... .......... .......... .......... 25% 331M 0s\n",
" 14400K .......... .......... .......... .......... .......... 25% 289M 0s\n",
" 14450K .......... .......... .......... .......... .......... 25% 168M 0s\n",
" 14500K .......... .......... .......... .......... .......... 25% 282M 0s\n",
" 14550K .......... .......... .......... .......... .......... 25% 230M 0s\n",
" 14600K .......... .......... .......... .......... .......... 25% 356M 0s\n",
" 14650K .......... .......... .......... .......... .......... 25% 258M 0s\n",
" 14700K .......... .......... .......... .......... .......... 25% 231M 0s\n",
" 14750K .......... .......... .......... .......... .......... 25% 266M 0s\n",
" 14800K .......... .......... .......... .......... .......... 26% 268M 0s\n",
" 14850K .......... .......... .......... .......... .......... 26% 280M 0s\n",
" 14900K .......... .......... .......... .......... .......... 26% 365M 0s\n",
" 14950K .......... .......... .......... .......... .......... 26% 248M 0s\n",
" 15000K .......... .......... .......... .......... .......... 26% 376M 0s\n",
" 15050K .......... .......... .......... .......... .......... 26% 326M 0s\n",
" 15100K .......... .......... .......... .......... .......... 26% 267M 0s\n",
" 15150K .......... .......... .......... .......... .......... 26% 210M 0s\n",
" 15200K .......... .......... .......... .......... .......... 26% 299M 0s\n",
" 15250K .......... .......... .......... .......... .......... 26% 343M 0s\n",
" 15300K .......... .......... .......... .......... .......... 26% 359M 0s\n",
" 15350K .......... .......... .......... .......... .......... 26% 305M 0s\n",
" 15400K .......... .......... .......... .......... .......... 27% 368M 0s\n",
" 15450K .......... .......... .......... .......... .......... 27% 342M 0s\n",
" 15500K .......... .......... .......... .......... .......... 27% 366M 0s\n",
" 15550K .......... .......... .......... .......... .......... 27% 130M 0s\n",
" 15600K .......... .......... .......... .......... .......... 27% 96.6M 0s\n",
" 15650K .......... .......... .......... .......... .......... 27% 97.1M 0s\n",
" 15700K .......... .......... .......... .......... .......... 27% 98.4M 0s\n",
" 15750K .......... .......... .......... .......... .......... 27% 107M 0s\n",
" 15800K .......... .......... .......... .......... .......... 27% 89.7M 0s\n",
" 15850K .......... .......... .......... .......... .......... 27% 71.4M 0s\n",
" 15900K .......... .......... .......... .......... .......... 27% 74.2M 0s\n",
" 15950K .......... .......... .......... .......... .......... 28% 70.9M 0s\n",
" 16000K .......... .......... .......... .......... .......... 28% 157M 0s\n",
" 16050K .......... .......... .......... .......... .......... 28% 175M 0s\n",
" 16100K .......... .......... .......... .......... .......... 28% 186M 0s\n",
" 16150K .......... .......... .......... .......... .......... 28% 141M 0s\n",
" 16200K .......... .......... .......... .......... .......... 28% 150M 0s\n",
" 16250K .......... .......... .......... .......... .......... 28% 173M 0s\n",
" 16300K .......... .......... .......... .......... .......... 28% 188M 0s\n",
" 16350K .......... .......... .......... .......... .......... 28% 169M 0s\n",
" 16400K .......... .......... .......... .......... .......... 28% 197M 0s\n",
" 16450K .......... .......... .......... .......... .......... 28% 176M 0s\n",
" 16500K .......... .......... .......... .......... .......... 28% 190M 0s\n",
" 16550K .......... .......... .......... .......... .......... 29% 152M 0s\n",
" 16600K .......... .......... .......... .......... .......... 29% 161M 0s\n",
" 16650K .......... .......... .......... .......... .......... 29% 196M 0s\n",
" 16700K .......... .......... .......... .......... .......... 29% 197M 0s\n",
" 16750K .......... .......... .......... .......... .......... 29% 194M 0s\n",
" 16800K .......... .......... .......... .......... .......... 29% 165M 0s\n",
" 16850K .......... .......... .......... .......... .......... 29% 146M 0s\n",
" 16900K .......... .......... .......... .......... .......... 29% 171M 0s\n",
" 16950K .......... .......... .......... .......... .......... 29% 167M 0s\n",
" 17000K .......... .......... .......... .......... .......... 29% 208M 0s\n",
" 17050K .......... .......... .......... .......... .......... 29% 218M 0s\n",
" 17100K .......... .......... .......... .......... .......... 30% 205M 0s\n",
" 17150K .......... .......... .......... .......... .......... 30% 193M 0s\n",
" 17200K .......... .......... .......... .......... .......... 30% 206M 0s\n",
" 17250K .......... .......... .......... .......... .......... 30% 211M 0s\n",
" 17300K .......... .......... .......... .......... .......... 30% 192M 0s\n",
" 17350K .......... .......... .......... .......... .......... 30% 159M 0s\n",
" 17400K .......... .......... .......... .......... .......... 30% 199M 0s\n",
" 17450K .......... .......... .......... .......... .......... 30% 153M 0s\n",
" 17500K .......... .......... .......... .......... .......... 30% 175M 0s\n",
" 17550K .......... .......... .......... .......... .......... 30% 184M 0s\n",
" 17600K .......... .......... .......... .......... .......... 30% 205M 0s\n",
" 17650K .......... .......... .......... .......... .......... 30% 190M 0s\n",
" 17700K .......... .......... .......... .......... .......... 31% 199M 0s\n",
" 17750K .......... .......... .......... .......... .......... 31% 161M 0s\n",
" 17800K .......... .......... .......... .......... .......... 31% 212M 0s\n",
" 17850K .......... .......... .......... .......... .......... 31% 205M 0s\n",
" 17900K .......... .......... .......... .......... .......... 31% 205M 0s\n",
" 17950K .......... .......... .......... .......... .......... 31% 209M 0s\n",
" 18000K .......... .......... .......... .......... .......... 31% 200M 0s\n",
" 18050K .......... .......... .......... .......... .......... 31% 181M 0s\n",
" 18100K .......... .......... .......... .......... .......... 31% 214M 0s\n",
" 18150K .......... .......... .......... .......... .......... 31% 234M 0s\n",
" 18200K .......... .......... .......... .......... .......... 31% 367M 0s\n",
" 18250K .......... .......... .......... .......... .......... 32% 222M 0s\n",
" 18300K .......... .......... .......... .......... .......... 32% 300M 0s\n",
" 18350K .......... .......... .......... .......... .......... 32% 325M 0s\n",
" 18400K .......... .......... .......... .......... .......... 32% 327M 0s\n",
" 18450K .......... .......... .......... .......... .......... 32% 216M 0s\n",
" 18500K .......... .......... .......... .......... .......... 32% 192M 0s\n",
" 18550K .......... .......... .......... .......... .......... 32% 226M 0s\n",
" 18600K .......... .......... .......... .......... .......... 32% 194M 0s\n",
" 18650K .......... .......... .......... .......... .......... 32% 210M 0s\n",
" 18700K .......... .......... .......... .......... .......... 32% 207M 0s\n",
" 18750K .......... .......... .......... .......... .......... 32% 178M 0s\n",
" 18800K .......... .......... .......... .......... .......... 33% 200M 0s\n",
" 18850K .......... .......... .......... .......... .......... 33% 184M 0s\n",
" 18900K .......... .......... .......... .......... .......... 33% 210M 0s\n",
" 18950K .......... .......... .......... .......... .......... 33% 176M 0s\n",
" 19000K .......... .......... .......... .......... .......... 33% 198M 0s\n",
" 19050K .......... .......... .......... .......... .......... 33% 203M 0s\n",
" 19100K .......... .......... .......... .......... .......... 33% 157M 0s\n",
" 19150K .......... .......... .......... .......... .......... 33% 148M 0s\n",
" 19200K .......... .......... .......... .......... .......... 33% 215M 0s\n",
" 19250K .......... .......... .......... .......... .......... 33% 206M 0s\n",
" 19300K .......... .......... .......... .......... .......... 33% 208M 0s\n",
" 19350K .......... .......... .......... .......... .......... 33% 177M 0s\n",
" 19400K .......... .......... .......... .......... .......... 34% 201M 0s\n",
" 19450K .......... .......... .......... .......... .......... 34% 186M 0s\n",
" 19500K .......... .......... .......... .......... .......... 34% 186M 0s\n",
" 19550K .......... .......... .......... .......... .......... 34% 159M 0s\n",
" 19600K .......... .......... .......... .......... .......... 34% 167M 0s\n",
" 19650K .......... .......... .......... .......... .......... 34% 182M 0s\n",
" 19700K .......... .......... .......... .......... .......... 34% 172M 0s\n",
" 19750K .......... .......... .......... .......... .......... 34% 173M 0s\n",
" 19800K .......... .......... .......... .......... .......... 34% 228M 0s\n",
" 19850K .......... .......... .......... .......... .......... 34% 219M 0s\n",
" 19900K .......... .......... .......... .......... .......... 34% 199M 0s\n",
" 19950K .......... .......... .......... .......... .......... 35% 278M 0s\n",
" 20000K .......... .......... .......... .......... .......... 35% 364M 0s\n",
" 20050K .......... .......... .......... .......... .......... 35% 393M 0s\n",
" 20100K .......... .......... .......... .......... .......... 35% 370M 0s\n",
" 20150K .......... .......... .......... .......... .......... 35% 307M 0s\n",
" 20200K .......... .......... .......... .......... .......... 35% 388M 0s\n",
" 20250K .......... .......... .......... .......... .......... 35% 315M 0s\n",
" 20300K .......... .......... .......... .......... .......... 35% 206M 0s\n",
" 20350K .......... .......... .......... .......... .......... 35% 225M 0s\n",
" 20400K .......... .......... .......... .......... .......... 35% 273M 0s\n",
" 20450K .......... .......... .......... .......... .......... 35% 218M 0s\n",
" 20500K .......... .......... .......... .......... .......... 35% 235M 0s\n",
" 20550K .......... .......... .......... .......... .......... 36% 219M 0s\n",
" 20600K .......... .......... .......... .......... .......... 36% 257M 0s\n",
" 20650K .......... .......... .......... .......... .......... 36% 246M 0s\n",
" 20700K .......... .......... .......... .......... .......... 36% 235M 0s\n",
" 20750K .......... .......... .......... .......... .......... 36% 207M 0s\n",
" 20800K .......... .......... .......... .......... .......... 36% 204M 0s\n",
" 20850K .......... .......... .......... .......... .......... 36% 264M 0s\n",
" 20900K .......... .......... .......... .......... .......... 36% 229M 0s\n",
" 20950K .......... .......... .......... .......... .......... 36% 163M 0s\n",
" 21000K .......... .......... .......... .......... .......... 36% 256M 0s\n",
" 21050K .......... .......... .......... .......... .......... 36% 310M 0s\n",
" 21100K .......... .......... .......... .......... .......... 37% 363M 0s\n",
" 21150K .......... .......... .......... .......... .......... 37% 313M 0s\n",
" 21200K .......... .......... .......... .......... .......... 37% 340M 0s\n",
" 21250K .......... .......... .......... .......... .......... 37% 223M 0s\n",
" 21300K .......... .......... .......... .......... .......... 37% 159M 0s\n",
" 21350K .......... .......... .......... .......... .......... 37% 205M 0s\n",
" 21400K .......... .......... .......... .......... .......... 37% 202M 0s\n",
" 21450K .......... .......... .......... .......... .......... 37% 188M 0s\n",
" 21500K .......... .......... .......... .......... .......... 37% 195M 0s\n",
" 21550K .......... .......... .......... .......... .......... 37% 176M 0s\n",
" 21600K .......... .......... .......... .......... .......... 37% 225M 0s\n",
" 21650K .......... .......... .......... .......... .......... 38% 208M 0s\n",
" 21700K .......... .......... .......... .......... .......... 38% 213M 0s\n",
" 21750K .......... .......... .......... .......... .......... 38% 211M 0s\n",
" 21800K .......... .......... .......... .......... .......... 38% 239M 0s\n",
" 21850K .......... .......... .......... .......... .......... 38% 212M 0s\n",
" 21900K .......... .......... .......... .......... .......... 38% 226M 0s\n",
" 21950K .......... .......... .......... .......... .......... 38% 221M 0s\n",
" 22000K .......... .......... .......... .......... .......... 38% 242M 0s\n",
" 22050K .......... .......... .......... .......... .......... 38% 251M 0s\n",
" 22100K .......... .......... .......... .......... .......... 38% 218M 0s\n",
" 22150K .......... .......... .......... .......... .......... 38% 151M 0s\n",
" 22200K .......... .......... .......... .......... .......... 38% 149M 0s\n",
" 22250K .......... .......... .......... .......... .......... 39% 107M 0s\n",
" 22300K .......... .......... .......... .......... .......... 39% 130M 0s\n",
" 22350K .......... .......... .......... .......... .......... 39% 112M 0s\n",
" 22400K .......... .......... .......... .......... .......... 39% 179M 0s\n",
" 22450K .......... .......... .......... .......... .......... 39% 196M 0s\n",
" 22500K .......... .......... .......... .......... .......... 39% 256M 0s\n",
" 22550K .......... .......... .......... .......... .......... 39% 200M 0s\n",
" 22600K .......... .......... .......... .......... .......... 39% 33.6M 0s\n",
" 22650K .......... .......... .......... .......... .......... 39% 46.1M 0s\n",
" 22700K .......... .......... .......... .......... .......... 39% 43.1M 0s\n",
" 22750K .......... .......... .......... .......... .......... 39% 31.4M 0s\n",
" 22800K .......... .......... .......... .......... .......... 40% 30.2M 0s\n",
" 22850K .......... .......... .......... .......... .......... 40% 43.1M 0s\n",
" 22900K .......... .......... .......... .......... .......... 40% 128M 0s\n",
" 22950K .......... .......... .......... .......... .......... 40% 137M 0s\n",
" 23000K .......... .......... .......... .......... .......... 40% 235M 0s\n",
" 23050K .......... .......... .......... .......... .......... 40% 67.5M 0s\n",
" 23100K .......... .......... .......... .......... .......... 40% 41.6M 0s\n",
" 23150K .......... .......... .......... .......... .......... 40% 49.3M 0s\n",
" 23200K .......... .......... .......... .......... .......... 40% 71.5M 0s\n",
" 23250K .......... .......... .......... .......... .......... 40% 226M 0s\n",
" 23300K .......... .......... .......... .......... .......... 40% 341M 0s\n",
" 23350K .......... .......... .......... .......... .......... 40% 307M 0s\n",
" 23400K .......... .......... .......... .......... .......... 41% 245M 0s\n",
" 23450K .......... .......... .......... .......... .......... 41% 333M 0s\n",
" 23500K .......... .......... .......... .......... .......... 41% 382M 0s\n",
" 23550K .......... .......... .......... .......... .......... 41% 320M 0s\n",
" 23600K .......... .......... .......... .......... .......... 41% 258M 0s\n",
" 23650K .......... .......... .......... .......... .......... 41% 237M 0s\n",
" 23700K .......... .......... .......... .......... .......... 41% 211M 0s\n",
" 23750K .......... .......... .......... .......... .......... 41% 234M 0s\n",
" 23800K .......... .......... .......... .......... .......... 41% 237M 0s\n",
" 23850K .......... .......... .......... .......... .......... 41% 223M 0s\n",
" 23900K .......... .......... .......... .......... .......... 41% 120M 0s\n",
" 23950K .......... .......... .......... .......... .......... 42% 226M 0s\n",
" 24000K .......... .......... .......... .......... .......... 42% 281M 0s\n",
" 24050K .......... .......... .......... .......... .......... 42% 239M 0s\n",
" 24100K .......... .......... .......... .......... .......... 42% 379M 0s\n",
" 24150K .......... .......... .......... .......... .......... 42% 204M 0s\n",
" 24200K .......... .......... .......... .......... .......... 42% 351M 0s\n",
" 24250K .......... .......... .......... .......... .......... 42% 266M 0s\n",
" 24300K .......... .......... .......... .......... .......... 42% 270M 0s\n",
" 24350K .......... .......... .......... .......... .......... 42% 282M 0s\n",
" 24400K .......... .......... .......... .......... .......... 42% 261M 0s\n",
" 24450K .......... .......... .......... .......... .......... 42% 291M 0s\n",
" 24500K .......... .......... .......... .......... .......... 42% 305M 0s\n",
" 24550K .......... .......... .......... .......... .......... 43% 272M 0s\n",
" 24600K .......... .......... .......... .......... .......... 43% 347M 0s\n",
" 24650K .......... .......... .......... .......... .......... 43% 371M 0s\n",
" 24700K .......... .......... .......... .......... .......... 43% 388M 0s\n",
" 24750K .......... .......... .......... .......... .......... 43% 210M 0s\n",
" 24800K .......... .......... .......... .......... .......... 43% 268M 0s\n",
" 24850K .......... .......... .......... .......... .......... 43% 368M 0s\n",
" 24900K .......... .......... .......... .......... .......... 43% 376M 0s\n",
" 24950K .......... .......... .......... .......... .......... 43% 314M 0s\n",
" 25000K .......... .......... .......... .......... .......... 43% 369M 0s\n",
" 25050K .......... .......... .......... .......... .......... 43% 301M 0s\n",
" 25100K .......... .......... .......... .......... .......... 44% 353M 0s\n",
" 25150K .......... .......... .......... .......... .......... 44% 237M 0s\n",
" 25200K .......... .......... .......... .......... .......... 44% 235M 0s\n",
" 25250K .......... .......... .......... .......... .......... 44% 377M 0s\n",
" 25300K .......... .......... .......... .......... .......... 44% 370M 0s\n",
" 25350K .......... .......... .......... .......... .......... 44% 228M 0s\n",
" 25400K .......... .......... .......... .......... .......... 44% 337M 0s\n",
" 25450K .......... .......... .......... .......... .......... 44% 366M 0s\n",
" 25500K .......... .......... .......... .......... .......... 44% 372M 0s\n",
" 25550K .......... .......... .......... .......... .......... 44% 323M 0s\n",
" 25600K .......... .......... .......... .......... .......... 44% 360M 0s\n",
" 25650K .......... .......... .......... .......... .......... 45% 355M 0s\n",
" 25700K .......... .......... .......... .......... .......... 45% 236M 0s\n",
" 25750K .......... .......... .......... .......... .......... 45% 292M 0s\n",
" 25800K .......... .......... .......... .......... .......... 45% 281M 0s\n",
" 25850K .......... .......... .......... .......... .......... 45% 318M 0s\n",
" 25900K .......... .......... .......... .......... .......... 45% 371M 0s\n",
" 25950K .......... .......... .......... .......... .......... 45% 343M 0s\n",
" 26000K .......... .......... .......... .......... .......... 45% 168M 0s\n",
" 26050K .......... .......... .......... .......... .......... 45% 207M 0s\n",
" 26100K .......... .......... .......... .......... .......... 45% 213M 0s\n",
" 26150K .......... .......... .......... .......... .......... 45% 92.4M 0s\n",
" 26200K .......... .......... .......... .......... .......... 45% 217M 0s\n",
" 26250K .......... .......... .......... .......... .......... 46% 350M 0s\n",
" 26300K .......... .......... .......... .......... .......... 46% 278M 0s\n",
" 26350K .......... .......... .......... .......... .......... 46% 305M 0s\n",
" 26400K .......... .......... .......... .......... .......... 46% 255M 0s\n",
" 26450K .......... .......... .......... .......... .......... 46% 307M 0s\n",
" 26500K .......... .......... .......... .......... .......... 46% 342M 0s\n",
" 26550K .......... .......... .......... .......... .......... 46% 306M 0s\n",
" 26600K .......... .......... .......... .......... .......... 46% 361M 0s\n",
" 26650K .......... .......... .......... .......... .......... 46% 295M 0s\n",
" 26700K .......... .......... .......... .......... .......... 46% 166M 0s\n",
" 26750K .......... .......... .......... .......... .......... 46% 265M 0s\n",
" 26800K .......... .......... .......... .......... .......... 47% 293M 0s\n",
" 26850K .......... .......... .......... .......... .......... 47% 316M 0s\n",
" 26900K .......... .......... .......... .......... .......... 47% 350M 0s\n",
" 26950K .......... .......... .......... .......... .......... 47% 205M 0s\n",
" 27000K .......... .......... .......... .......... .......... 47% 179M 0s\n",
" 27050K .......... .......... .......... .......... .......... 47% 185M 0s\n",
" 27100K .......... .......... .......... .......... .......... 47% 196M 0s\n",
" 27150K .......... .......... .......... .......... .......... 47% 251M 0s\n",
" 27200K .......... .......... .......... .......... .......... 47% 279M 0s\n",
" 27250K .......... .......... .......... .......... .......... 47% 347M 0s\n",
" 27300K .......... .......... .......... .......... .......... 47% 360M 0s\n",
" 27350K .......... .......... .......... .......... .......... 47% 308M 0s\n",
" 27400K .......... .......... .......... .......... .......... 48% 360M 0s\n",
" 27450K .......... .......... .......... .......... .......... 48% 362M 0s\n",
" 27500K .......... .......... .......... .......... .......... 48% 248M 0s\n",
" 27550K .......... .......... .......... .......... .......... 48% 287M 0s\n",
" 27600K .......... .......... .......... .......... .......... 48% 347M 0s\n",
" 27650K .......... .......... .......... .......... .......... 48% 342M 0s\n",
" 27700K .......... .......... .......... .......... .......... 48% 365M 0s\n",
" 27750K .......... .......... .......... .......... .......... 48% 296M 0s\n",
" 27800K .......... .......... .......... .......... .......... 48% 233M 0s\n",
" 27850K .......... .......... .......... .......... .......... 48% 328M 0s\n",
" 27900K .......... .......... .......... .......... .......... 48% 127M 0s\n",
" 27950K .......... .......... .......... .......... .......... 49% 256M 0s\n",
" 28000K .......... .......... .......... .......... .......... 49% 133M 0s\n",
" 28050K .......... .......... .......... .......... .......... 49% 245M 0s\n",
" 28100K .......... .......... .......... .......... .......... 49% 370M 0s\n",
" 28150K .......... .......... .......... .......... .......... 49% 309M 0s\n",
" 28200K .......... .......... .......... .......... .......... 49% 366M 0s\n",
" 28250K .......... .......... .......... .......... .......... 49% 334M 0s\n",
" 28300K .......... .......... .......... .......... .......... 49% 284M 0s\n",
" 28350K .......... .......... .......... .......... .......... 49% 44.0M 0s\n",
" 28400K .......... .......... .......... .......... .......... 49% 49.2M 0s\n",
" 28450K .......... .......... .......... .......... .......... 49% 60.0M 0s\n",
" 28500K .......... .......... .......... .......... .......... 50% 61.0M 0s\n",
" 28550K .......... .......... .......... .......... .......... 50% 146M 0s\n",
" 28600K .......... .......... .......... .......... .......... 50% 277M 0s\n",
" 28650K .......... .......... .......... .......... .......... 50% 257M 0s\n",
" 28700K .......... .......... .......... .......... .......... 50% 288M 0s\n",
" 28750K .......... .......... .......... .......... .......... 50% 281M 0s\n",
" 28800K .......... .......... .......... .......... .......... 50% 303M 0s\n",
" 28850K .......... .......... .......... .......... .......... 50% 372M 0s\n",
" 28900K .......... .......... .......... .......... .......... 50% 356M 0s\n",
" 28950K .......... .......... .......... .......... .......... 50% 301M 0s\n",
" 29000K .......... .......... .......... .......... .......... 50% 369M 0s\n",
" 29050K .......... .......... .......... .......... .......... 50% 359M 0s\n",
" 29100K .......... .......... .......... .......... .......... 51% 239M 0s\n",
" 29150K .......... .......... .......... .......... .......... 51% 124M 0s\n",
" 29200K .......... .......... .......... .......... .......... 51% 262M 0s\n",
" 29250K .......... .......... .......... .......... .......... 51% 237M 0s\n",
" 29300K .......... .......... .......... .......... .......... 51% 231M 0s\n",
" 29350K .......... .......... .......... .......... .......... 51% 175M 0s\n",
" 29400K .......... .......... .......... .......... .......... 51% 294M 0s\n",
" 29450K .......... .......... .......... .......... .......... 51% 260M 0s\n",
" 29500K .......... .......... .......... .......... .......... 51% 282M 0s\n",
" 29550K .......... .......... .......... .......... .......... 51% 287M 0s\n",
" 29600K .......... .......... .......... .......... .......... 51% 256M 0s\n",
" 29650K .......... .......... .......... .......... .......... 52% 295M 0s\n",
" 29700K .......... .......... .......... .......... .......... 52% 327M 0s\n",
" 29750K .......... .......... .......... .......... .......... 52% 244M 0s\n",
" 29800K .......... .......... .......... .......... .......... 52% 351M 0s\n",
" 29850K .......... .......... .......... .......... .......... 52% 348M 0s\n",
" 29900K .......... .......... .......... .......... .......... 52% 285M 0s\n",
" 29950K .......... .......... .......... .......... .......... 52% 309M 0s\n",
" 30000K .......... .......... .......... .......... .......... 52% 374M 0s\n",
" 30050K .......... .......... .......... .......... .......... 52% 370M 0s\n",
" 30100K .......... .......... .......... .......... .......... 52% 235M 0s\n",
" 30150K .......... .......... .......... .......... .......... 52% 174M 0s\n",
" 30200K .......... .......... .......... .......... .......... 52% 127M 0s\n",
" 30250K .......... .......... .......... .......... .......... 53% 218M 0s\n",
" 30300K .......... .......... .......... .......... .......... 53% 304M 0s\n",
" 30350K .......... .......... .......... .......... .......... 53% 327M 0s\n",
" 30400K .......... .......... .......... .......... .......... 53% 260M 0s\n",
" 30450K .......... .......... .......... .......... .......... 53% 189M 0s\n",
" 30500K .......... .......... .......... .......... .......... 53% 339M 0s\n",
" 30550K .......... .......... .......... .......... .......... 53% 312M 0s\n",
" 30600K .......... .......... .......... .......... .......... 53% 374M 0s\n",
" 30650K .......... .......... .......... .......... .......... 53% 377M 0s\n",
" 30700K .......... .......... .......... .......... .......... 53% 231M 0s\n",
" 30750K .......... .......... .......... .......... .......... 53% 209M 0s\n",
" 30800K .......... .......... .......... .......... .......... 54% 241M 0s\n",
" 30850K .......... .......... .......... .......... .......... 54% 227M 0s\n",
" 30900K .......... .......... .......... .......... .......... 54% 334M 0s\n",
" 30950K .......... .......... .......... .......... .......... 54% 225M 0s\n",
" 31000K .......... .......... .......... .......... .......... 54% 350M 0s\n",
" 31050K .......... .......... .......... .......... .......... 54% 339M 0s\n",
" 31100K .......... .......... .......... .......... .......... 54% 364M 0s\n",
" 31150K .......... .......... .......... .......... .......... 54% 327M 0s\n",
" 31200K .......... .......... .......... .......... .......... 54% 371M 0s\n",
" 31250K .......... .......... .......... .......... .......... 54% 322M 0s\n",
" 31300K .......... .......... .......... .......... .......... 54% 293M 0s\n",
" 31350K .......... .......... .......... .......... .......... 54% 299M 0s\n",
" 31400K .......... .......... .......... .......... .......... 55% 301M 0s\n",
" 31450K .......... .......... .......... .......... .......... 55% 77.8M 0s\n",
" 31500K .......... .......... .......... .......... .......... 55% 253M 0s\n",
" 31550K .......... .......... .......... .......... .......... 55% 221M 0s\n",
" 31600K .......... .......... .......... .......... .......... 55% 257M 0s\n",
" 31650K .......... .......... .......... .......... .......... 55% 185M 0s\n",
" 31700K .......... .......... .......... .......... .......... 55% 155M 0s\n",
" 31750K .......... .......... .......... .......... .......... 55% 257M 0s\n",
" 31800K .......... .......... .......... .......... .......... 55% 297M 0s\n",
" 31850K .......... .......... .......... .......... .......... 55% 277M 0s\n",
" 31900K .......... .......... .......... .......... .......... 55% 341M 0s\n",
" 31950K .......... .......... .......... .......... .......... 56% 271M 0s\n",
" 32000K .......... .......... .......... .......... .......... 56% 272M 0s\n",
" 32050K .......... .......... .......... .......... .......... 56% 360M 0s\n",
" 32100K .......... .......... .......... .......... .......... 56% 357M 0s\n",
" 32150K .......... .......... .......... .......... .......... 56% 310M 0s\n",
" 32200K .......... .......... .......... .......... .......... 56% 156M 0s\n",
" 32250K .......... .......... .......... .......... .......... 56% 121M 0s\n",
" 32300K .......... .......... .......... .......... .......... 56% 271M 0s\n",
" 32350K .......... .......... .......... .......... .......... 56% 288M 0s\n",
" 32400K .......... .......... .......... .......... .......... 56% 347M 0s\n",
" 32450K .......... .......... .......... .......... .......... 56% 296M 0s\n",
" 32500K .......... .......... .......... .......... .......... 57% 297M 0s\n",
" 32550K .......... .......... .......... .......... .......... 57% 276M 0s\n",
" 32600K .......... .......... .......... .......... .......... 57% 362M 0s\n",
" 32650K .......... .......... .......... .......... .......... 57% 376M 0s\n",
" 32700K .......... .......... .......... .......... .......... 57% 348M 0s\n",
" 32750K .......... .......... .......... .......... .......... 57% 23.1M 0s\n",
" 32800K .......... .......... .......... .......... .......... 57% 102M 0s\n",
" 32850K .......... .......... .......... .......... .......... 57% 84.1M 0s\n",
" 32900K .......... .......... .......... .......... .......... 57% 142M 0s\n",
" 32950K .......... .......... .......... .......... .......... 57% 132M 0s\n",
" 33000K .......... .......... .......... .......... .......... 57% 149M 0s\n",
" 33050K .......... .......... .......... .......... .......... 57% 175M 0s\n",
" 33100K .......... .......... .......... .......... .......... 58% 161M 0s\n",
" 33150K .......... .......... .......... .......... .......... 58% 157M 0s\n",
" 33200K .......... .......... .......... .......... .......... 58% 166M 0s\n",
" 33250K .......... .......... .......... .......... .......... 58% 169M 0s\n",
" 33300K .......... .......... .......... .......... .......... 58% 216M 0s\n",
" 33350K .......... .......... .......... .......... .......... 58% 141M 0s\n",
" 33400K .......... .......... .......... .......... .......... 58% 158M 0s\n",
" 33450K .......... .......... .......... .......... .......... 58% 188M 0s\n",
" 33500K .......... .......... .......... .......... .......... 58% 184M 0s\n",
" 33550K .......... .......... .......... .......... .......... 58% 180M 0s\n",
" 33600K .......... .......... .......... .......... .......... 58% 204M 0s\n",
" 33650K .......... .......... .......... .......... .......... 59% 215M 0s\n",
" 33700K .......... .......... .......... .......... .......... 59% 174M 0s\n",
" 33750K .......... .......... .......... .......... .......... 59% 136M 0s\n",
" 33800K .......... .......... .......... .......... .......... 59% 209M 0s\n",
" 33850K .......... .......... .......... .......... .......... 59% 210M 0s\n",
" 33900K .......... .......... .......... .......... .......... 59% 197M 0s\n",
" 33950K .......... .......... .......... .......... .......... 59% 185M 0s\n",
" 34000K .......... .......... .......... .......... .......... 59% 205M 0s\n",
" 34050K .......... .......... .......... .......... .......... 59% 212M 0s\n",
" 34100K .......... .......... .......... .......... .......... 59% 183M 0s\n",
" 34150K .......... .......... .......... .......... .......... 59% 176M 0s\n",
" 34200K .......... .......... .......... .......... .......... 59% 218M 0s\n",
" 34250K .......... .......... .......... .......... .......... 60% 219M 0s\n",
" 34300K .......... .......... .......... .......... .......... 60% 187M 0s\n",
" 34350K .......... .......... .......... .......... .......... 60% 161M 0s\n",
" 34400K .......... .......... .......... .......... .......... 60% 173M 0s\n",
" 34450K .......... .......... .......... .......... .......... 60% 173M 0s\n",
" 34500K .......... .......... .......... .......... .......... 60% 169M 0s\n",
" 34550K .......... .......... .......... .......... .......... 60% 142M 0s\n",
" 34600K .......... .......... .......... .......... .......... 60% 183M 0s\n",
" 34650K .......... .......... .......... .......... .......... 60% 160M 0s\n",
" 34700K .......... .......... .......... .......... .......... 60% 192M 0s\n",
" 34750K .......... .......... .......... .......... .......... 60% 187M 0s\n",
" 34800K .......... .......... .......... .......... .......... 61% 213M 0s\n",
" 34850K .......... .......... .......... .......... .......... 61% 203M 0s\n",
" 34900K .......... .......... .......... .......... .......... 61% 205M 0s\n",
" 34950K .......... .......... .......... .......... .......... 61% 142M 0s\n",
" 35000K .......... .......... .......... .......... .......... 61% 215M 0s\n",
" 35050K .......... .......... .......... .......... .......... 61% 204M 0s\n",
" 35100K .......... .......... .......... .......... .......... 61% 181M 0s\n",
" 35150K .......... .......... .......... .......... .......... 61% 152M 0s\n",
" 35200K .......... .......... .......... .......... .......... 61% 202M 0s\n",
" 35250K .......... .......... .......... .......... .......... 61% 170M 0s\n",
" 35300K .......... .......... .......... .......... .......... 61% 163M 0s\n",
" 35350K .......... .......... .......... .......... .......... 61% 164M 0s\n",
" 35400K .......... .......... .......... .......... .......... 62% 108M 0s\n",
" 35450K .......... .......... .......... .......... .......... 62% 177M 0s\n",
" 35500K .......... .......... .......... .......... .......... 62% 176M 0s\n",
" 35550K .......... .......... .......... .......... .......... 62% 178M 0s\n",
" 35600K .......... .......... .......... .......... .......... 62% 179M 0s\n",
" 35650K .......... .......... .......... .......... .......... 62% 211M 0s\n",
" 35700K .......... .......... .......... .......... .......... 62% 257M 0s\n",
" 35750K .......... .......... .......... .......... .......... 62% 258M 0s\n",
" 35800K .......... .......... .......... .......... .......... 62% 192M 0s\n",
" 35850K .......... .......... .......... .......... .......... 62% 297M 0s\n",
" 35900K .......... .......... .......... .......... .......... 62% 365M 0s\n",
" 35950K .......... .......... .......... .......... .......... 63% 330M 0s\n",
" 36000K .......... .......... .......... .......... .......... 63% 369M 0s\n",
" 36050K .......... .......... .......... .......... .......... 63% 322M 0s\n",
" 36100K .......... .......... .......... .......... .......... 63% 311M 0s\n",
" 36150K .......... .......... .......... .......... .......... 63% 267M 0s\n",
" 36200K .......... .......... .......... .......... .......... 63% 283M 0s\n",
" 36250K .......... .......... .......... .......... .......... 63% 267M 0s\n",
" 36300K .......... .......... .......... .......... .......... 63% 276M 0s\n",
" 36350K .......... .......... .......... .......... .......... 63% 174M 0s\n",
" 36400K .......... .......... .......... .......... .......... 63% 283M 0s\n",
" 36450K .......... .......... .......... .......... .......... 63% 362M 0s\n",
" 36500K .......... .......... .......... .......... .......... 64% 287M 0s\n",
" 36550K .......... .......... .......... .......... .......... 64% 291M 0s\n",
" 36600K .......... .......... .......... .......... .......... 64% 363M 0s\n",
" 36650K .......... .......... .......... .......... .......... 64% 216M 0s\n",
" 36700K .......... .......... .......... .......... .......... 64% 280M 0s\n",
" 36750K .......... .......... .......... .......... .......... 64% 330M 0s\n",
" 36800K .......... .......... .......... .......... .......... 64% 368M 0s\n",
" 36850K .......... .......... .......... .......... .......... 64% 345M 0s\n",
" 36900K .......... .......... .......... .......... .......... 64% 368M 0s\n",
" 36950K .......... .......... .......... .......... .......... 64% 235M 0s\n",
" 37000K .......... .......... .......... .......... .......... 64% 313M 0s\n",
" 37050K .......... .......... .......... .......... .......... 64% 363M 0s\n",
" 37100K .......... .......... .......... .......... .......... 65% 366M 0s\n",
" 37150K .......... .......... .......... .......... .......... 65% 314M 0s\n",
" 37200K .......... .......... .......... .......... .......... 65% 358M 0s\n",
" 37250K .......... .......... .......... .......... .......... 65% 367M 0s\n",
" 37300K .......... .......... .......... .......... .......... 65% 298M 0s\n",
" 37350K .......... .......... .......... .......... .......... 65% 283M 0s\n",
" 37400K .......... .......... .......... .......... .......... 65% 59.0M 0s\n",
" 37450K .......... .......... .......... .......... .......... 65% 185M 0s\n",
" 37500K .......... .......... .......... .......... .......... 65% 274M 0s\n",
" 37550K .......... .......... .......... .......... .......... 65% 313M 0s\n",
" 37600K .......... .......... .......... .......... .......... 65% 333M 0s\n",
" 37650K .......... .......... .......... .......... .......... 66% 9.55M 0s\n",
" 37700K .......... .......... .......... .......... .......... 66% 56.6M 0s\n",
" 37750K .......... .......... .......... .......... .......... 66% 119M 0s\n",
" 37800K .......... .......... .......... .......... .......... 66% 194M 0s\n",
" 37850K .......... .......... .......... .......... .......... 66% 212M 0s\n",
" 37900K .......... .......... .......... .......... .......... 66% 203M 0s\n",
" 37950K .......... .......... .......... .......... .......... 66% 183M 0s\n",
" 38000K .......... .......... .......... .......... .......... 66% 211M 0s\n",
" 38050K .......... .......... .......... .......... .......... 66% 20.8M 0s\n",
" 38100K .......... .......... .......... .......... .......... 66% 242M 0s\n",
" 38150K .......... .......... .......... .......... .......... 66% 194M 0s\n",
" 38200K .......... .......... .......... .......... .......... 66% 251M 0s\n",
" 38250K .......... .......... .......... .......... .......... 67% 236M 0s\n",
" 38300K .......... .......... .......... .......... .......... 67% 230M 0s\n",
" 38350K .......... .......... .......... .......... .......... 67% 177M 0s\n",
" 38400K .......... .......... .......... .......... .......... 67% 310M 0s\n",
" 38450K .......... .......... .......... .......... .......... 67% 246M 0s\n",
" 38500K .......... .......... .......... .......... .......... 67% 243M 0s\n",
" 38550K .......... .......... .......... .......... .......... 67% 194M 0s\n",
" 38600K .......... .......... .......... .......... .......... 67% 199M 0s\n",
" 38650K .......... .......... .......... .......... .......... 67% 236M 0s\n",
" 38700K .......... .......... .......... .......... .......... 67% 278M 0s\n",
" 38750K .......... .......... .......... .......... .......... 67% 186M 0s\n",
" 38800K .......... .......... .......... .......... .......... 68% 222M 0s\n",
" 38850K .......... .......... .......... .......... .......... 68% 254M 0s\n",
" 38900K .......... .......... .......... .......... .......... 68% 217M 0s\n",
" 38950K .......... .......... .......... .......... .......... 68% 299M 0s\n",
" 39000K .......... .......... .......... .......... .......... 68% 349M 0s\n",
" 39050K .......... .......... .......... .......... .......... 68% 360M 0s\n",
" 39100K .......... .......... .......... .......... .......... 68% 246M 0s\n",
" 39150K .......... .......... .......... .......... .......... 68% 137M 0s\n",
" 39200K .......... .......... .......... .......... .......... 68% 277M 0s\n",
" 39250K .......... .......... .......... .......... .......... 68% 298M 0s\n",
" 39300K .......... .......... .......... .......... .......... 68% 211M 0s\n",
" 39350K .......... .......... .......... .......... .......... 69% 165M 0s\n",
" 39400K .......... .......... .......... .......... .......... 69% 244M 0s\n",
" 39450K .......... .......... .......... .......... .......... 69% 270M 0s\n",
" 39500K .......... .......... .......... .......... .......... 69% 214M 0s\n",
" 39550K .......... .......... .......... .......... .......... 69% 108M 0s\n",
" 39600K .......... .......... .......... .......... .......... 69% 201M 0s\n",
" 39650K .......... .......... .......... .......... .......... 69% 212M 0s\n",
" 39700K .......... .......... .......... .......... .......... 69% 186M 0s\n",
" 39750K .......... .......... .......... .......... .......... 69% 152M 0s\n",
" 39800K .......... .......... .......... .......... .......... 69% 214M 0s\n",
" 39850K .......... .......... .......... .......... .......... 69% 215M 0s\n",
" 39900K .......... .......... .......... .......... .......... 69% 143M 0s\n",
" 39950K .......... .......... .......... .......... .......... 70% 308M 0s\n",
" 40000K .......... .......... .......... .......... .......... 70% 342M 0s\n",
" 40050K .......... .......... .......... .......... .......... 70% 344M 0s\n",
" 40100K .......... .......... .......... .......... .......... 70% 277M 0s\n",
" 40150K .......... .......... .......... .......... .......... 70% 211M 0s\n",
" 40200K .......... .......... .......... .......... .......... 70% 206M 0s\n",
" 40250K .......... .......... .......... .......... .......... 70% 356M 0s\n",
" 40300K .......... .......... .......... .......... .......... 70% 371M 0s\n",
" 40350K .......... .......... .......... .......... .......... 70% 298M 0s\n",
" 40400K .......... .......... .......... .......... .......... 70% 367M 0s\n",
" 40450K .......... .......... .......... .......... .......... 70% 307M 0s\n",
" 40500K .......... .......... .......... .......... .......... 71% 343M 0s\n",
" 40550K .......... .......... .......... .......... .......... 71% 292M 0s\n",
" 40600K .......... .......... .......... .......... .......... 71% 362M 0s\n",
" 40650K .......... .......... .......... .......... .......... 71% 236M 0s\n",
" 40700K .......... .......... .......... .......... .......... 71% 230M 0s\n",
" 40750K .......... .......... .......... .......... .......... 71% 157M 0s\n",
" 40800K .......... .......... .......... .......... .......... 71% 175M 0s\n",
" 40850K .......... .......... .......... .......... .......... 71% 194M 0s\n",
" 40900K .......... .......... .......... .......... .......... 71% 176M 0s\n",
" 40950K .......... .......... .......... .......... .......... 71% 170M 0s\n",
" 41000K .......... .......... .......... .......... .......... 71% 210M 0s\n",
" 41050K .......... .......... .......... .......... .......... 71% 198M 0s\n",
" 41100K .......... .......... .......... .......... .......... 72% 197M 0s\n",
" 41150K .......... .......... .......... .......... .......... 72% 186M 0s\n",
" 41200K .......... .......... .......... .......... .......... 72% 165M 0s\n",
" 41250K .......... .......... .......... .......... .......... 72% 169M 0s\n",
" 41300K .......... .......... .......... .......... .......... 72% 197M 0s\n",
" 41350K .......... .......... .......... .......... .......... 72% 172M 0s\n",
" 41400K .......... .......... .......... .......... .......... 72% 200M 0s\n",
" 41450K .......... .......... .......... .......... .......... 72% 205M 0s\n",
" 41500K .......... .......... .......... .......... .......... 72% 204M 0s\n",
" 41550K .......... .......... .......... .......... .......... 72% 171M 0s\n",
" 41600K .......... .......... .......... .......... .......... 72% 198M 0s\n",
" 41650K .......... .......... .......... .......... .......... 73% 43.5M 0s\n",
" 41700K .......... .......... .......... .......... .......... 73% 287M 0s\n",
" 41750K .......... .......... .......... .......... .......... 73% 193M 0s\n",
" 41800K .......... .......... .......... .......... .......... 73% 16.8M 0s\n",
" 41850K .......... .......... .......... .......... .......... 73% 279M 0s\n",
" 41900K .......... .......... .......... .......... .......... 73% 359M 0s\n",
" 41950K .......... .......... .......... .......... .......... 73% 317M 0s\n",
" 42000K .......... .......... .......... .......... .......... 73% 13.9M 0s\n",
" 42050K .......... .......... .......... .......... .......... 73% 185M 0s\n",
" 42100K .......... .......... .......... .......... .......... 73% 206M 0s\n",
" 42150K .......... .......... .......... .......... .......... 73% 152M 0s\n",
" 42200K .......... .......... .......... .......... .......... 73% 166M 0s\n",
" 42250K .......... .......... .......... .......... .......... 74% 254M 0s\n",
" 42300K .......... .......... .......... .......... .......... 74% 228M 0s\n",
" 42350K .......... .......... .......... .......... .......... 74% 175M 0s\n",
" 42400K .......... .......... .......... .......... .......... 74% 184M 0s\n",
" 42450K .......... .......... .......... .......... .......... 74% 177M 0s\n",
" 42500K .......... .......... .......... .......... .......... 74% 165M 0s\n",
" 42550K .......... .......... .......... .......... .......... 74% 127M 0s\n",
" 42600K .......... .......... .......... .......... .......... 74% 168M 0s\n",
" 42650K .......... .......... .......... .......... .......... 74% 182M 0s\n",
" 42700K .......... .......... .......... .......... .......... 74% 143M 0s\n",
" 42750K .......... .......... .......... .......... .......... 74% 166M 0s\n",
" 42800K .......... .......... .......... .......... .......... 75% 201M 0s\n",
" 42850K .......... .......... .......... .......... .......... 75% 204M 0s\n",
" 42900K .......... .......... .......... .......... .......... 75% 353M 0s\n",
" 42950K .......... .......... .......... .......... .......... 75% 104M 0s\n",
" 43000K .......... .......... .......... .......... .......... 75% 289M 0s\n",
" 43050K .......... .......... .......... .......... .......... 75% 285M 0s\n",
" 43100K .......... .......... .......... .......... .......... 75% 303M 0s\n",
" 43150K .......... .......... .......... .......... .......... 75% 227M 0s\n",
" 43200K .......... .......... .......... .......... .......... 75% 198M 0s\n",
" 43250K .......... .......... .......... .......... .......... 75% 197M 0s\n",
" 43300K .......... .......... .......... .......... .......... 75% 256M 0s\n",
" 43350K .......... .......... .......... .......... .......... 76% 141M 0s\n",
" 43400K .......... .......... .......... .......... .......... 76% 162M 0s\n",
" 43450K .......... .......... .......... .......... .......... 76% 203M 0s\n",
" 43500K .......... .......... .......... .......... .......... 76% 307M 0s\n",
" 43550K .......... .......... .......... .......... .......... 76% 179M 0s\n",
" 43600K .......... .......... .......... .......... .......... 76% 173M 0s\n",
" 43650K .......... .......... .......... .......... .......... 76% 198M 0s\n",
" 43700K .......... .......... .......... .......... .......... 76% 182M 0s\n",
" 43750K .......... .......... .......... .......... .......... 76% 169M 0s\n",
" 43800K .......... .......... .......... .......... .......... 76% 301M 0s\n",
" 43850K .......... .......... .......... .......... .......... 76% 292M 0s\n",
" 43900K .......... .......... .......... .......... .......... 76% 297M 0s\n",
" 43950K .......... .......... .......... .......... .......... 77% 273M 0s\n",
" 44000K .......... .......... .......... .......... .......... 77% 226M 0s\n",
" 44050K .......... .......... .......... .......... .......... 77% 292M 0s\n",
" 44100K .......... .......... .......... .......... .......... 77% 314M 0s\n",
" 44150K .......... .......... .......... .......... .......... 77% 248M 0s\n",
" 44200K .......... .......... .......... .......... .......... 77% 238M 0s\n",
" 44250K .......... .......... .......... .......... .......... 77% 179M 0s\n",
" 44300K .......... .......... .......... .......... .......... 77% 210M 0s\n",
" 44350K .......... .......... .......... .......... .......... 77% 224M 0s\n",
" 44400K .......... .......... .......... .......... .......... 77% 268M 0s\n",
" 44450K .......... .......... .......... .......... .......... 77% 150M 0s\n",
" 44500K .......... .......... .......... .......... .......... 78% 237M 0s\n",
" 44550K .......... .......... .......... .......... .......... 78% 228M 0s\n",
" 44600K .......... .......... .......... .......... .......... 78% 253M 0s\n",
" 44650K .......... .......... .......... .......... .......... 78% 216M 0s\n",
" 44700K .......... .......... .......... .......... .......... 78% 137M 0s\n",
" 44750K .......... .......... .......... .......... .......... 78% 162M 0s\n",
" 44800K .......... .......... .......... .......... .......... 78% 229M 0s\n",
" 44850K .......... .......... .......... .......... .......... 78% 360M 0s\n",
" 44900K .......... .......... .......... .......... .......... 78% 194M 0s\n",
" 44950K .......... .......... .......... .......... .......... 78% 276M 0s\n",
" 45000K .......... .......... .......... .......... .......... 78% 315M 0s\n",
" 45050K .......... .......... .......... .......... .......... 78% 342M 0s\n",
" 45100K .......... .......... .......... .......... .......... 79% 371M 0s\n",
" 45150K .......... .......... .......... .......... .......... 79% 301M 0s\n",
" 45200K .......... .......... .......... .......... .......... 79% 232M 0s\n",
" 45250K .......... .......... .......... .......... .......... 79% 345M 0s\n",
" 45300K .......... .......... .......... .......... .......... 79% 344M 0s\n",
" 45350K .......... .......... .......... .......... .......... 79% 182M 0s\n",
" 45400K .......... .......... .......... .......... .......... 79% 195M 0s\n",
" 45450K .......... .......... .......... .......... .......... 79% 190M 0s\n",
" 45500K .......... .......... .......... .......... .......... 79% 278M 0s\n",
" 45550K .......... .......... .......... .......... .......... 79% 254M 0s\n",
" 45600K .......... .......... .......... .......... .......... 79% 369M 0s\n",
" 45650K .......... .......... .......... .......... .......... 80% 363M 0s\n",
" 45700K .......... .......... .......... .......... .......... 80% 305M 0s\n",
" 45750K .......... .......... .......... .......... .......... 80% 266M 0s\n",
" 45800K .......... .......... .......... .......... .......... 80% 221M 0s\n",
" 45850K .......... .......... .......... .......... .......... 80% 200M 0s\n",
" 45900K .......... .......... .......... .......... .......... 80% 225M 0s\n",
" 45950K .......... .......... .......... .......... .......... 80% 134M 0s\n",
" 46000K .......... .......... .......... .......... .......... 80% 270M 0s\n",
" 46050K .......... .......... .......... .......... .......... 80% 344M 0s\n",
" 46100K .......... .......... .......... .......... .......... 80% 246M 0s\n",
" 46150K .......... .......... .......... .......... .......... 80% 304M 0s\n",
" 46200K .......... .......... .......... .......... .......... 81% 197M 0s\n",
" 46250K .......... .......... .......... .......... .......... 81% 336M 0s\n",
" 46300K .......... .......... .......... .......... .......... 81% 355M 0s\n",
" 46350K .......... .......... .......... .......... .......... 81% 289M 0s\n",
" 46400K .......... .......... .......... .......... .......... 81% 329M 0s\n",
" 46450K .......... .......... .......... .......... .......... 81% 331M 0s\n",
" 46500K .......... .......... .......... .......... .......... 81% 291M 0s\n",
" 46550K .......... .......... .......... .......... .......... 81% 324M 0s\n",
" 46600K .......... .......... .......... .......... .......... 81% 356M 0s\n",
" 46650K .......... .......... .......... .......... .......... 81% 377M 0s\n",
" 46700K .......... .......... .......... .......... .......... 81% 371M 0s\n",
" 46750K .......... .......... .......... .......... .......... 81% 235M 0s\n",
" 46800K .......... .......... .......... .......... .......... 82% 42.1M 0s\n",
" 46850K .......... .......... .......... .......... .......... 82% 148M 0s\n",
" 46900K .......... .......... .......... .......... .......... 82% 325M 0s\n",
" 46950K .......... .......... .......... .......... .......... 82% 312M 0s\n",
" 47000K .......... .......... .......... .......... .......... 82% 355M 0s\n",
" 47050K .......... .......... .......... .......... .......... 82% 97.9M 0s\n",
" 47100K .......... .......... .......... .......... .......... 82% 217M 0s\n",
" 47150K .......... .......... .......... .......... .......... 82% 160M 0s\n",
" 47200K .......... .......... .......... .......... .......... 82% 135M 0s\n",
" 47250K .......... .......... .......... .......... .......... 82% 205M 0s\n",
" 47300K .......... .......... .......... .......... .......... 82% 228M 0s\n",
" 47350K .......... .......... .......... .......... .......... 83% 213M 0s\n",
" 47400K .......... .......... .......... .......... .......... 83% 183M 0s\n",
" 47450K .......... .......... .......... .......... .......... 83% 332M 0s\n",
" 47500K .......... .......... .......... .......... .......... 83% 373M 0s\n",
" 47550K .......... .......... .......... .......... .......... 83% 320M 0s\n",
" 47600K .......... .......... .......... .......... .......... 83% 290M 0s\n",
" 47650K .......... .......... .......... .......... .......... 83% 341M 0s\n",
" 47700K .......... .......... .......... .......... .......... 83% 221M 0s\n",
" 47750K .......... .......... .......... .......... .......... 83% 190M 0s\n",
" 47800K .......... .......... .......... .......... .......... 83% 314M 0s\n",
" 47850K .......... .......... .......... .......... .......... 83% 360M 0s\n",
" 47900K .......... .......... .......... .......... .......... 83% 376M 0s\n",
" 47950K .......... .......... .......... .......... .......... 84% 323M 0s\n",
" 48000K .......... .......... .......... .......... .......... 84% 20.6M 0s\n",
" 48050K .......... .......... .......... .......... .......... 84% 232M 0s\n",
" 48100K .......... .......... .......... .......... .......... 84% 297M 0s\n",
" 48150K .......... .......... .......... .......... .......... 84% 264M 0s\n",
" 48200K .......... .......... .......... .......... .......... 84% 294M 0s\n",
" 48250K .......... .......... .......... .......... .......... 84% 39.5M 0s\n",
" 48300K .......... .......... .......... .......... .......... 84% 211M 0s\n",
" 48350K .......... .......... .......... .......... .......... 84% 149M 0s\n",
" 48400K .......... .......... .......... .......... .......... 84% 154M 0s\n",
" 48450K .......... .......... .......... .......... .......... 84% 237M 0s\n",
" 48500K .......... .......... .......... .......... .......... 85% 243M 0s\n",
" 48550K .......... .......... .......... .......... .......... 85% 216M 0s\n",
" 48600K .......... .......... .......... .......... .......... 85% 264M 0s\n",
" 48650K .......... .......... .......... .......... .......... 85% 188M 0s\n",
" 48700K .......... .......... .......... .......... .......... 85% 244M 0s\n",
" 48750K .......... .......... .......... .......... .......... 85% 219M 0s\n",
" 48800K .......... .......... .......... .......... .......... 85% 212M 0s\n",
" 48850K .......... .......... .......... .......... .......... 85% 176M 0s\n",
" 48900K .......... .......... .......... .......... .......... 85% 306M 0s\n",
" 48950K .......... .......... .......... .......... .......... 85% 221M 0s\n",
" 49000K .......... .......... .......... .......... .......... 85% 204M 0s\n",
" 49050K .......... .......... .......... .......... .......... 85% 271M 0s\n",
" 49100K .......... .......... .......... .......... .......... 86% 187M 0s\n",
" 49150K .......... .......... .......... .......... .......... 86% 207M 0s\n",
" 49200K .......... .......... .......... .......... .......... 86% 247M 0s\n",
" 49250K .......... .......... .......... .......... .......... 86% 294M 0s\n",
" 49300K .......... .......... .......... .......... .......... 86% 226M 0s\n",
" 49350K .......... .......... .......... .......... .......... 86% 188M 0s\n",
" 49400K .......... .......... .......... .......... .......... 86% 300M 0s\n",
" 49450K .......... .......... .......... .......... .......... 86% 340M 0s\n",
" 49500K .......... .......... .......... .......... .......... 86% 358M 0s\n",
" 49550K .......... .......... .......... .......... .......... 86% 287M 0s\n",
" 49600K .......... .......... .......... .......... .......... 86% 206M 0s\n",
" 49650K .......... .......... .......... .......... .......... 87% 341M 0s\n",
" 49700K .......... .......... .......... .......... .......... 87% 350M 0s\n",
" 49750K .......... .......... .......... .......... .......... 87% 329M 0s\n",
" 49800K .......... .......... .......... .......... .......... 87% 319M 0s\n",
" 49850K .......... .......... .......... .......... .......... 87% 273M 0s\n",
" 49900K .......... .......... .......... .......... .......... 87% 286M 0s\n",
" 49950K .......... .......... .......... .......... .......... 87% 244M 0s\n",
" 50000K .......... .......... .......... .......... .......... 87% 354M 0s\n",
" 50050K .......... .......... .......... .......... .......... 87% 345M 0s\n",
" 50100K .......... .......... .......... .......... .......... 87% 373M 0s\n",
" 50150K .......... .......... .......... .......... .......... 87% 326M 0s\n",
" 50200K .......... .......... .......... .......... .......... 88% 372M 0s\n",
" 50250K .......... .......... .......... .......... .......... 88% 305M 0s\n",
" 50300K .......... .......... .......... .......... .......... 88% 369M 0s\n",
" 50350K .......... .......... .......... .......... .......... 88% 317M 0s\n",
" 50400K .......... .......... .......... .......... .......... 88% 375M 0s\n",
" 50450K .......... .......... .......... .......... .......... 88% 319M 0s\n",
" 50500K .......... .......... .......... .......... .......... 88% 335M 0s\n",
" 50550K .......... .......... .......... .......... .......... 88% 331M 0s\n",
" 50600K .......... .......... .......... .......... .......... 88% 260M 0s\n",
" 50650K .......... .......... .......... .......... .......... 88% 265M 0s\n",
" 50700K .......... .......... .......... .......... .......... 88% 148M 0s\n",
" 50750K .......... .......... .......... .......... .......... 88% 142M 0s\n",
" 50800K .......... .......... .......... .......... .......... 89% 325M 0s\n",
" 50850K .......... .......... .......... .......... .......... 89% 351M 0s\n",
" 50900K .......... .......... .......... .......... .......... 89% 338M 0s\n",
" 50950K .......... .......... .......... .......... .......... 89% 320M 0s\n",
" 51000K .......... .......... .......... .......... .......... 89% 342M 0s\n",
" 51050K .......... .......... .......... .......... .......... 89% 368M 0s\n",
" 51100K .......... .......... .......... .......... .......... 89% 27.0M 0s\n",
" 51150K .......... .......... .......... .......... .......... 89% 182M 0s\n",
" 51200K .......... .......... .......... .......... .......... 89% 159M 0s\n",
" 51250K .......... .......... .......... .......... .......... 89% 325M 0s\n",
" 51300K .......... .......... .......... .......... .......... 89% 334M 0s\n",
" 51350K .......... .......... .......... .......... .......... 90% 319M 0s\n",
" 51400K .......... .......... .......... .......... .......... 90% 338M 0s\n",
" 51450K .......... .......... .......... .......... .......... 90% 100M 0s\n",
" 51500K .......... .......... .......... .......... .......... 90% 215M 0s\n",
" 51550K .......... .......... .......... .......... .......... 90% 126M 0s\n",
" 51600K .......... .......... .......... .......... .......... 90% 208M 0s\n",
" 51650K .......... .......... .......... .......... .......... 90% 43.7M 0s\n",
" 51700K .......... .......... .......... .......... .......... 90% 154M 0s\n",
" 51750K .......... .......... .......... .......... .......... 90% 145M 0s\n",
" 51800K .......... .......... .......... .......... .......... 90% 217M 0s\n",
" 51850K .......... .......... .......... .......... .......... 90% 200M 0s\n",
" 51900K .......... .......... .......... .......... .......... 90% 221M 0s\n",
" 51950K .......... .......... .......... .......... .......... 91% 115M 0s\n",
" 52000K .......... .......... .......... .......... .......... 91% 159M 0s\n",
" 52050K .......... .......... .......... .......... .......... 91% 159M 0s\n",
" 52100K .......... .......... .......... .......... .......... 91% 235M 0s\n",
" 52150K .......... .......... .......... .......... .......... 91% 223M 0s\n",
" 52200K .......... .......... .......... .......... .......... 91% 304M 0s\n",
" 52250K .......... .......... .......... .......... .......... 91% 299M 0s\n",
" 52300K .......... .......... .......... .......... .......... 91% 307M 0s\n",
" 52350K .......... .......... .......... .......... .......... 91% 229M 0s\n",
" 52400K .......... .......... .......... .......... .......... 91% 296M 0s\n",
" 52450K .......... .......... .......... .......... .......... 91% 185M 0s\n",
" 52500K .......... .......... .......... .......... .......... 92% 169M 0s\n",
" 52550K .......... .......... .......... .......... .......... 92% 183M 0s\n",
" 52600K .......... .......... .......... .......... .......... 92% 149M 0s\n",
" 52650K .......... .......... .......... .......... .......... 92% 239M 0s\n",
" 52700K .......... .......... .......... .......... .......... 92% 248M 0s\n",
" 52750K .......... .......... .......... .......... .......... 92% 237M 0s\n",
" 52800K .......... .......... .......... .......... .......... 92% 298M 0s\n",
" 52850K .......... .......... .......... .......... .......... 92% 229M 0s\n",
" 52900K .......... .......... .......... .......... .......... 92% 233M 0s\n",
" 52950K .......... .......... .......... .......... .......... 92% 169M 0s\n",
" 53000K .......... .......... .......... .......... .......... 92% 88.6M 0s\n",
" 53050K .......... .......... .......... .......... .......... 92% 53.7M 0s\n",
" 53100K .......... .......... .......... .......... .......... 93% 139M 0s\n",
" 53150K .......... .......... .......... .......... .......... 93% 217M 0s\n",
" 53200K .......... .......... .......... .......... .......... 93% 287M 0s\n",
" 53250K .......... .......... .......... .......... .......... 93% 209M 0s\n",
" 53300K .......... .......... .......... .......... .......... 93% 195M 0s\n",
" 53350K .......... .......... .......... .......... .......... 93% 269M 0s\n",
" 53400K .......... .......... .......... .......... .......... 93% 313M 0s\n",
" 53450K .......... .......... .......... .......... .......... 93% 267M 0s\n",
" 53500K .......... .......... .......... .......... .......... 93% 260M 0s\n",
" 53550K .......... .......... .......... .......... .......... 93% 157M 0s\n",
" 53600K .......... .......... .......... .......... .......... 93% 267M 0s\n",
" 53650K .......... .......... .......... .......... .......... 94% 301M 0s\n",
" 53700K .......... .......... .......... .......... .......... 94% 281M 0s\n",
" 53750K .......... .......... .......... .......... .......... 94% 144M 0s\n",
" 53800K .......... .......... .......... .......... .......... 94% 212M 0s\n",
" 53850K .......... .......... .......... .......... .......... 94% 310M 0s\n",
" 53900K .......... .......... .......... .......... .......... 94% 299M 0s\n",
" 53950K .......... .......... .......... .......... .......... 94% 261M 0s\n",
" 54000K .......... .......... .......... .......... .......... 94% 219M 0s\n",
" 54050K .......... .......... .......... .......... .......... 94% 310M 0s\n",
" 54100K .......... .......... .......... .......... .......... 94% 310M 0s\n",
" 54150K .......... .......... .......... .......... .......... 94% 279M 0s\n",
" 54200K .......... .......... .......... .......... .......... 95% 281M 0s\n",
" 54250K .......... .......... .......... .......... .......... 95% 309M 0s\n",
" 54300K .......... .......... .......... .......... .......... 95% 250M 0s\n",
" 54350K .......... .......... .......... .......... .......... 95% 254M 0s\n",
" 54400K .......... .......... .......... .......... .......... 95% 318M 0s\n",
" 54450K .......... .......... .......... .......... .......... 95% 248M 0s\n",
" 54500K .......... .......... .......... .......... .......... 95% 245M 0s\n",
" 54550K .......... .......... .......... .......... .......... 95% 158M 0s\n",
" 54600K .......... .......... .......... .......... .......... 95% 297M 0s\n",
" 54650K .......... .......... .......... .......... .......... 95% 307M 0s\n",
" 54700K .......... .......... .......... .......... .......... 95% 292M 0s\n",
" 54750K .......... .......... .......... .......... .......... 95% 40.0M 0s\n",
" 54800K .......... .......... .......... .......... .......... 96% 135M 0s\n",
" 54850K .......... .......... .......... .......... .......... 96% 217M 0s\n",
" 54900K .......... .......... .......... .......... .......... 96% 261M 0s\n",
" 54950K .......... .......... .......... .......... .......... 96% 127M 0s\n",
" 55000K .......... .......... .......... .......... .......... 96% 196M 0s\n",
" 55050K .......... .......... .......... .......... .......... 96% 343M 0s\n",
" 55100K .......... .......... .......... .......... .......... 96% 363M 0s\n",
" 55150K .......... .......... .......... .......... .......... 96% 245M 0s\n",
" 55200K .......... .......... .......... .......... .......... 96% 312M 0s\n",
" 55250K .......... .......... .......... .......... .......... 96% 118M 0s\n",
" 55300K .......... .......... .......... .......... .......... 96% 237M 0s\n",
" 55350K .......... .......... .......... .......... .......... 97% 281M 0s\n",
" 55400K .......... .......... .......... .......... .......... 97% 313M 0s\n",
" 55450K .......... .......... .......... .......... .......... 97% 270M 0s\n",
" 55500K .......... .......... .......... .......... .......... 97% 359M 0s\n",
" 55550K .......... .......... .......... .......... .......... 97% 285M 0s\n",
" 55600K .......... .......... .......... .......... .......... 97% 357M 0s\n",
" 55650K .......... .......... .......... .......... .......... 97% 360M 0s\n",
" 55700K .......... .......... .......... .......... .......... 97% 347M 0s\n",
" 55750K .......... .......... .......... .......... .......... 97% 8.13M 0s\n",
" 55800K .......... .......... .......... .......... .......... 97% 42.2M 0s\n",
" 55850K .......... .......... .......... .......... .......... 97% 53.0M 0s\n",
" 55900K .......... .......... .......... .......... .......... 97% 152M 0s\n",
" 55950K .......... .......... .......... .......... .......... 98% 245M 0s\n",
" 56000K .......... .......... .......... .......... .......... 98% 330M 0s\n",
" 56050K .......... .......... .......... .......... .......... 98% 338M 0s\n",
" 56100K .......... .......... .......... .......... .......... 98% 362M 0s\n",
" 56150K .......... .......... .......... .......... .......... 98% 276M 0s\n",
" 56200K .......... .......... .......... .......... .......... 98% 320M 0s\n",
" 56250K .......... .......... .......... .......... .......... 98% 274M 0s\n",
" 56300K .......... .......... .......... .......... .......... 98% 333M 0s\n",
" 56350K .......... .......... .......... .......... .......... 98% 285M 0s\n",
" 56400K .......... .......... .......... .......... .......... 98% 343M 0s\n",
" 56450K .......... .......... .......... .......... .......... 98% 364M 0s\n",
" 56500K .......... .......... .......... .......... .......... 99% 239M 0s\n",
" 56550K .......... .......... .......... .......... .......... 99% 285M 0s\n",
" 56600K .......... .......... .......... .......... .......... 99% 329M 0s\n",
" 56650K .......... .......... .......... .......... .......... 99% 282M 0s\n",
" 56700K .......... .......... .......... .......... .......... 99% 324M 0s\n",
" 56750K .......... .......... .......... .......... .......... 99% 307M 0s\n",
" 56800K .......... .......... .......... .......... .......... 99% 267M 0s\n",
" 56850K .......... .......... .......... .......... .......... 99% 290M 0s\n",
" 56900K .......... .......... .......... .......... .......... 99% 370M 0s\n",
" 56950K .......... .......... .......... .......... .......... 99% 287M 0s\n",
" 57000K .......... .......... .......... .......... .......... 99% 342M 0s\n",
" 57050K .......... .......... .......... .......... ........ 100% 342M=0.4s\n",
"\n",
"2022-09-16 19:45:23 (155 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]\n",
"\n",
"Python 3.6.5 :: Anaconda, Inc.\n"
]
}
],
"source": [
"%%bash\n",
"MINICONDA_INSTALLER_SCRIPT=Miniconda3-4.5.4-Linux-x86_64.sh\n",
"MINICONDA_PREFIX=/usr/local\n",
"wget https://repo.continuum.io/miniconda/$MINICONDA_INSTALLER_SCRIPT\n",
"chmod +x $MINICONDA_INSTALLER_SCRIPT\n",
"./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "g-8IlDZAd7Jx",
"outputId": "d90d7c98-994a-427d-f597-71eb5d4aef88"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Solving environment: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n",
"\n",
"\n",
"==> WARNING: A newer version of conda exists. <==\n",
" current version: 4.5.4\n",
" latest version: 4.14.0\n",
"\n",
"Please update conda by running\n",
"\n",
" $ conda update -n base conda\n",
"\n",
"\n",
"\n",
"## Package Plan ##\n",
"\n",
" environment location: /usr/local\n",
"\n",
" added / updated specs: \n",
" - viennarna\n",
"\n",
"\n",
"The following packages will be downloaded:\n",
"\n",
" package | build\n",
" ---------------------------|-----------------\n",
" sqlite-3.39.2 | h5082296_0 1.5 MB\n",
" wheel-0.37.1 | pyhd3eb1b0_0 31 KB\n",
" xz-5.2.5 | h7f8727e_1 389 KB\n",
" libgcc-ng-11.2.0 | h1234567_1 8.5 MB\n",
" python-3.7.13 | h12debd9_0 53.5 MB\n",
" ncurses-6.3 | h5eee18b_3 1.1 MB\n",
" pip-22.1.2 | py37h06a4308_0 2.9 MB\n",
" viennarna-2.4.14 | py37h8b12597_0 14.1 MB bioconda\n",
" libffi-3.3 | he6710b0_2 54 KB\n",
" ld_impl_linux-64-2.38 | h1181459_1 732 KB\n",
" setuptools-63.4.1 | py37h06a4308_0 1.4 MB\n",
" readline-8.1.2 | h7f8727e_1 423 KB\n",
" zlib-1.2.12 | h5eee18b_3 124 KB\n",
" tk-8.6.12 | h1ccaba5_0 3.3 MB\n",
" ca-certificates-2022.07.19 | h06a4308_0 131 KB\n",
" certifi-2022.6.15 | py37h06a4308_0 156 KB\n",
" libstdcxx-ng-11.2.0 | h1234567_1 6.1 MB\n",
" openssl-1.1.1q | h7f8727e_0 3.8 MB\n",
" ------------------------------------------------------------\n",
" Total: 98.1 MB\n",
"\n",
"The following NEW packages will be INSTALLED:\n",
"\n",
" ld_impl_linux-64: 2.38-h1181459_1 \n",
" viennarna: 2.4.14-py37h8b12597_0 bioconda\n",
"\n",
"The following packages will be UPDATED:\n",
"\n",
" ca-certificates: 2018.03.07-0 --> 2022.07.19-h06a4308_0 \n",
" certifi: 2018.4.16-py36_0 --> 2022.6.15-py37h06a4308_0\n",
" libffi: 3.2.1-hd88cf55_4 --> 3.3-he6710b0_2 \n",
" libgcc-ng: 7.2.0-hdf63c60_3 --> 11.2.0-h1234567_1 \n",
" libstdcxx-ng: 7.2.0-hdf63c60_3 --> 11.2.0-h1234567_1 \n",
" ncurses: 6.1-hf484d3e_0 --> 6.3-h5eee18b_3 \n",
" openssl: 1.0.2o-h20670df_0 --> 1.1.1q-h7f8727e_0 \n",
" pip: 10.0.1-py36_0 --> 22.1.2-py37h06a4308_0 \n",
" python: 3.6.5-hc3d631a_2 --> 3.7.13-h12debd9_0 \n",
" readline: 7.0-ha6073c6_4 --> 8.1.2-h7f8727e_1 \n",
" setuptools: 39.2.0-py36_0 --> 63.4.1-py37h06a4308_0 \n",
" sqlite: 3.23.1-he433501_0 --> 3.39.2-h5082296_0 \n",
" tk: 8.6.7-hc745277_3 --> 8.6.12-h1ccaba5_0 \n",
" wheel: 0.31.1-py36_0 --> 0.37.1-pyhd3eb1b0_0 \n",
" xz: 5.2.4-h14c3975_4 --> 5.2.5-h7f8727e_1 \n",
" zlib: 1.2.11-ha838bed_2 --> 1.2.12-h5eee18b_3 \n",
"\n",
"\n",
"Downloading and Extracting Packages\n",
"sqlite-3.39.2 | 1.5 MB | : 100% 1.0/1 [00:00<00:00, 2.94it/s] \n",
"wheel-0.37.1 | 31 KB | : 100% 1.0/1 [00:00<00:00, 20.12it/s]\n",
"xz-5.2.5 | 389 KB | : 100% 1.0/1 [00:00<00:00, 5.52it/s] \n",
"libgcc-ng-11.2.0 | 8.5 MB | : 100% 1.0/1 [00:01<00:00, 1.59s/it] \n",
"python-3.7.13 | 53.5 MB | : 100% 1.0/1 [00:10<00:00, 10.24s/it] \n",
"ncurses-6.3 | 1.1 MB | : 100% 1.0/1 [00:01<00:00, 1.07s/it] \n",
"pip-22.1.2 | 2.9 MB | : 100% 1.0/1 [00:01<00:00, 1.26s/it] \n",
"viennarna-2.4.14 | 14.1 MB | : 100% 1.0/1 [00:03<00:00, 3.84s/it] \n",
"libffi-3.3 | 54 KB | : 100% 1.0/1 [00:00<00:00, 18.44it/s]\n",
"ld_impl_linux-64-2.3 | 732 KB | : 100% 1.0/1 [00:00<00:00, 5.07it/s] \n",
"setuptools-63.4.1 | 1.4 MB | : 100% 1.0/1 [00:00<00:00, 1.53it/s] \n",
"readline-8.1.2 | 423 KB | : 100% 1.0/1 [00:00<00:00, 5.77it/s] \n",
"zlib-1.2.12 | 124 KB | : 100% 1.0/1 [00:00<00:00, 14.88it/s]\n",
"tk-8.6.12 | 3.3 MB | : 100% 1.0/1 [00:00<00:00, 1.11it/s] \n",
"ca-certificates-2022 | 131 KB | : 100% 1.0/1 [00:00<00:00, 17.71it/s]\n",
"certifi-2022.6.15 | 156 KB | : 100% 1.0/1 [00:00<00:00, 16.72it/s]\n",
"libstdcxx-ng-11.2.0 | 6.1 MB | : 100% 1.0/1 [00:01<00:00, 1.26s/it] \n",
"openssl-1.1.1q | 3.8 MB | : 100% 1.0/1 [00:00<00:00, 1.16it/s] \n",
"Preparing transaction: | \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n",
"Verifying transaction: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\bdone\n",
"Executing transaction: \\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n"
]
}
],
"source": [
"!conda install -c bioconda -y viennarna"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"id": "9PIMhlfJgp4M"
},
"outputs": [],
"source": [
"#import RNA"
]
},
{
"cell_type": "markdown",
"source": [
"#### Now we can build functions for making calculations"
],
"metadata": {
"id": "zebI95yk77lH"
}
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"id": "wSDcrIwJh6en"
},
"outputs": [],
"source": [
"import subprocess\n",
"def rna_fold_rnafold(frag, temperature):\n",
" args = [\"RNAfold\", \"-p\", \"-T\", str(temperature)]\n",
" fc = subprocess.run(args, input=str(frag), check=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
" out = str(fc.stdout)\n",
" test = out.splitlines()\n",
" structure = test[1].split()[0]\n",
" centroid = test[3].split()[0]\n",
" MFE = test[1].split(\" \", 1)[1]\n",
" try:\n",
" MFE = float(re.sub('[()]', '', MFE))\n",
" except:\n",
" print(\"Error parsing MFE values\", test)\n",
" ED = float(test[4].split()[-1])\n",
"\n",
" return (structure, centroid, MFE, ED)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"id": "N7d2GiWVYoXg"
},
"outputs": [],
"source": [
"def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):\n",
" def input_function(): # inner function, this will be returned\n",
" ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) # create tf.data.Dataset object with data and its label\n",
" if shuffle:\n",
" ds = ds.shuffle(1000) # randomize order of data\n",
" ds = ds.batch(batch_size).repeat(num_epochs) # split dataset into batches of 32 and repeat process for number of epochs\n",
" return ds # return a batch of the dataset\n",
" return input_function # return a function object for use\n",
"\n"
]
},
{
"cell_type": "markdown",
"source": [
"### Making a single prediction using model. You can input any sequence you want after \"frag\""
],
"metadata": {
"id": "oPLejdZq4sjg"
}
},
{
"cell_type": "code",
"source": [
"#vegfa frag: \n",
"frag = \"GACTCTGCGCAGAGCACTTTGGGTCCGGAGGGCGAGACTCCGGCGGAAGCATTCCCGGGCGGGTGACCCAGCACGGTCCCTCTTGGAATTGGATTCGCCATTTTATTTTTCTTGCTGCTAAATCACCGAGCCCGGAAGATTAGAGAGTTTTATTTCTGGGATTCCTGTAGACACACCCACCCACATACATACATTTATATATATATATATTATATATATATAAAAATAAATATCTCTATTTTATATATATAAAATATATATATTCTTTTTTTAAATTAACAGTGCTAATGTTATTGGTGTCTTCACTGGATGTATTTGACTGCTGTGGACTTGAGTTGGGAGGGGAATGTTC\"\n",
"#rag = input('Input sequence for testing:')\n",
"frag = frag.replace(\"T\", \"U\")\n",
"\n",
"\n",
"# Calculate frag features\n",
"GCpercent = get_gc_content(frag)\n",
"CGratio = get_cg_ratio(frag)\n",
"AUratio = get_au_ratio(frag)\n",
"di_freqs = get_di_freqs(frag)\n",
"\n",
"# use function (rna_fold_rnafold) to get MFE (can use python binding method if available)\n",
"structure, centroid, MFE, ED = rna_fold_rnafold(frag, 37)\n",
"MFE = MFE.replace(\")\", \"\")\n",
"MFE = float(MFE.replace(\"(\", \"\"))\n",
"MFE = float(MFE)\n",
"\n",
"# put features in a list\n",
"full_features = [float(len(frag)), float(GCpercent), float(CGratio), float(AUratio)]\n",
"\n",
"# Append the dinucleotide frequencies\n",
"for freq in di_freqs:\n",
" full_features.append(freq)\n",
"\n",
"# convert feature list to pandas dataframe AND TRANSPOSE to give correct shape (1,20)\n",
"full_predict = pd.DataFrame(full_features).transpose()\n",
"full_predict.columns = [\"Length\", \"GCpercent\",\"CGratio\", \"AUratio\", \"AA\",\"AU\",\"AG\",\"AC\",\"UA\",\"UU\",\"UG\",\"UC\",\"GA\",\"GU\",\"GG\",\"GC\",\"CA\",\"CU\",\"CG\", \"CC\"]\n",
"\n",
"# Predict mean MFE and standard deviation based on features\n",
"meanMFE_result = mean_mfe_model.predict(full_predict)\n",
"stddev_result = std_dev_model.predict(full_predict)\n",
"\n",
"# Extract values and calculate z-score\n",
"meanMFE = meanMFE_result[0][0]\n",
"stddev = stddev_result[0][0]\n",
"zscore = round((MFE-meanMFE)/(stddev), 2)\n",
"print(f'\\n\\n\\nSequence analyzed:\\n{frag}\\n\\nSequence Length: {str(len(frag))} nt\\nMeanMFE prediction: {round(float(meanMFE), 2)}\\nPredicted MFE Standard Deviation: {round(float(stddev), 2)}\\nRNAfold MFE: {round(float(MFE), 2)}\\n\\nCalculated z-score: {round(zscore, 2)}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LuD-58X74ukh",
"outputId": "c2b9f3c4-9e06-44ee-9053-f84b07da4abe"
},
"execution_count": 104,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Error parsing MFE values ['GACUCUGCGCAGAGCACUUUGGGUCCGGAGGGCGAGACUCCGGCGGAAGCAUUCCCGGGCGGGUGACCCAGCACGGUCCCUCUUGGAAUUGGAUUCGCCAUUUUAUUUUUCUUGCUGCUAAAUCACCGAGCCCGGAAGAUUAGAGAGUUUUAUUUCUGGGAUUCCUGUAGACACACCCACCCACAUACAUACAUUUAUAUAUAUAUAUAUUAUAUAUAUAUAAAAAUAAAUAUCUCUAUUUUAUAUAUAUAAAAUAUAUAUAUUCUUUUUUUAAAUUAACAGUGCUAAUGUUAUUGGUGUCUUCACUGGAUGUAUUUGACUGCUGUGGACUUGAGUUGGGAGGGGAAUGUUC', '((((((((.....)))....)))))(((((.......))))).....(((((((((((((.(((((..(((((.((.......((((((.((.....))))))))....)).))))).....)))))..))))((((..(((((((......)))))))..))))(((....)))((((((((((..((..((..((((((((((((((((.(((((((((((((.((........))))))))))))))).))))))))))).............(((((.......)))))..((((....))))..)))))..))..)).)))))......).))))..))))))))). (-95.00)', ',,(((({(..,,,)}}.,,,}||}((((((.......)))))},.||{((((((((((((.(((((..{((((.((.......{{(((.{((.....)))})))}.....}.))})),,...)))))..))))((((..(((((((......)))))))..)))),{,....}},(((({(((((,,,({{(({{({,,,(((((((((((.(((((((((((({{((........))))))))))))))).))))))))))).............{{(((,,.....})))),}|(((....)))))))))))}.))..,,.)))))......),}))).,))))))))}. [-103.06]', '.........................(((((.......)))))......((((((((((((.(((((..(((((..........(((((.(((.....)))))))).......))))).....)))))..))))((((..(((((((......)))))))..))))..........(((..(((((...............(((((((((((.(((((((((((..(((........))).))))))))))).))))))))))).............(((((.......)))))...(((....))).................))))).........)))..)))))))).. {-77.30 d=65.02}', ' frequency of mfe structure in ensemble 2.1041e-06; ensemble diversity 98.73 ']\n",
"\n",
"\n",
"\n",
"Sequence analyzed:\n",
"GACUCUGCGCAGAGCACUUUGGGUCCGGAGGGCGAGACUCCGGCGGAAGCAUUCCCGGGCGGGUGACCCAGCACGGUCCCUCUUGGAAUUGGAUUCGCCAUUUUAUUUUUCUUGCUGCUAAAUCACCGAGCCCGGAAGAUUAGAGAGUUUUAUUUCUGGGAUUCCUGUAGACACACCCACCCACAUACAUACAUUUAUAUAUAUAUAUAUUAUAUAUAUAUAAAAAUAAAUAUCUCUAUUUUAUAUAUAUAAAAUAUAUAUAUUCUUUUUUUAAAUUAACAGUGCUAAUGUUAUUGGUGUCUUCACUGGAUGUAUUUGACUGCUGUGGACUUGAGUUGGGAGGGGAAUGUUC\n",
"\n",
"Sequence Length: 352 nt\n",
"MeanMFE prediction: -63.41\n",
"Predicted MFE Standard Deviation: 5.09\n",
"RNAfold MFE: -95.0\n",
"\n",
"Calculated z-score: -6.2\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"##Questions or suggestions?\n",
"\n",
"Email:\n",
"Ryan J. Andrews ryan.j.rna@gmail.com\n",
"Warren B. Rouse wbrouse@iastate.edu\n",
"Walter N. Moss wmoss@iastate.edu\n"
],
"metadata": {
"id": "YlcioH8vBF8y"
}
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"collapsed_sections": [
"bMr7MPVmoiHf",
"xchnxAsaKKqO",
"SIpsyJITPcbG",
"Ak_TMAzGOIFq",
"_JAG5JYytnTr",
"vStvuHK_t1hs",
"NixZHwDBGPid",
"q03hC506k96D"
],
"machine_shape": "hm",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 0
}