{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\" \n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "#Import all the necessary packages\n", "import pandas as pd\n", "import numpy as np\n", "import keras as k\n", "import tensorflow as tf\n", "from sklearn.preprocessing import MinMaxScaler\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "from matplotlib import rcParams, cycler\n", "from sklearn.model_selection import train_test_split\n", "from tensorflow.keras.callbacks import EarlyStopping\n", "from tensorflow.keras.callbacks import ModelCheckpoint\n", "from tensorflow.keras.callbacks import ReduceLROnPlateau\n", "from tensorflow.keras.models import Model, load_model\n", "from tensorboard.plugins.hparams import api as hp\n", "from tensorflow.keras import regularizers\n", "from pickle import dump\n", "from sklearn.model_selection import KFold\n", "import tensorflowjs as tfjs\n", "import keract\n", "import datetime\n", "import math" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[name: \"/device:CPU:0\"\n", "device_type: \"CPU\"\n", "memory_limit: 268435456\n", "locality {\n", "}\n", "incarnation: 3595000761477948485\n", ", name: \"/device:GPU:0\"\n", "device_type: \"GPU\"\n", "memory_limit: 4937233203\n", "locality {\n", " bus_id: 1\n", " links {\n", " }\n", "}\n", "incarnation: 668913008403759742\n", "physical_device_desc: \"device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0, compute capability: 6.1\"\n", "]\n" ] } ], "source": [ "from tensorflow.python.client import device_lib \n", "print(device_lib.list_local_devices())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Hyperparameters\n", "BATCH_SIZE = 128\n", "LEARNING_RATE = 0.001\n", "DROPOUT = 0.004\n", "VALIDATION_SPLIT = 0.1\n", "OPTIMIZER = 'adam'\n", "\n", "LAYER_1 = 900\n", "LAYER_2 = 150\n", "LAYER_3 = 700\n", "LAYER_4 = 550\n", "LAYER_5 = 950\n", "LAYER_6 = 950" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Create a model\n", "def create_model():\n", " model = tf.keras.Sequential() \n", " model.add(tf.keras.layers.Dense(128, input_dim=len(keys) - 1, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", " model.add(tf.keras.layers.Dense(1))\n", "\n", " def rmsle(y_true, y_pred):\n", " y_pred_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)\n", " y_true_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)\n", " return K.sqrt(K.mean(K.square(y_pred_log - y_true_log), axis = -1))\n", "\n", " model.compile(loss='mean_squared_error', \n", " optimizer=tf.keras.optimizers.Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, clipnorm=1.0)\n", " )\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: median values were scaled by multiplying by 0.0000011173 and adding -0.111732\n", "4043\n", "4043\n", "3226\n", "817\n" ] } ], "source": [ "# Load the dataset\n", "dataset = pd.read_csv(\"./Data/dataset_final.csv\")\n", "\n", "# Get keys\n", "keys = dataset.keys()\n", "\n", "# Target key\n", "target = 'deptFreePrice'\n", "\n", "# Initialize scaler\n", "scaler = MinMaxScaler(feature_range=(0, 1))\n", "\n", "# Scale both the training inputs and outputs\n", "scaled_train = scaler.fit_transform(dataset[keys])\n", "\n", "print(\"Note: median values were scaled by multiplying by {:.10f} and adding {:.6f}\".format(scaler.scale_[6], scaler.min_[6]))\n", "\n", "# Store the values to scale price back to understandable form\n", "multiplied_by = scaler.scale_[6]\n", "added = scaler.min_[6]\n", "\n", "scalerData = {'scale_': scaler.scale_, 'min_': scaler.min_}\n", "scalerDf = pd.DataFrame(scalerData, columns = ['scale_', 'min_'])\n", "\n", "\n", "scaled_train = pd.DataFrame(scaled_train, columns=keys)\n", "\n", "# Divide dataset into training and testing sets\n", "msk = np.random.rand(len(scaled_train)) < 0.8\n", "\n", "X, Y = scaled_train.drop(target, axis=1).values, scaled_train[target].values\n", "X_train_2 =scaled_train[msk].values\n", "\n", "X_train, X_test = scaled_train[msk].drop(target, axis=1).values, scaled_train[~msk].drop(target, axis=1).values\n", "Y_train, Y_test = scaled_train[msk][target].values, scaled_train[~msk][target].values\n", "\n", "print(len(X))\n", "print(len(Y))\n", "print(len(X_train))\n", "print(len(X_test))" ] }, { "cell_type": "code", "execution_count": 192, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_4\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "dense_44 (Dense) (None, 128) 5504 \n", "_________________________________________________________________\n", "dense_45 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_46 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_47 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_48 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_49 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_50 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_51 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_52 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_53 (Dense) (None, 128) 16512 \n", "_________________________________________________________________\n", "dense_54 (Dense) (None, 1) 129 \n", "=================================================================\n", "Total params: 154,241\n", "Trainable params: 154,241\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n", "Train on 2903 samples, validate on 323 samples\n", "Epoch 1/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0312 \n", "Epoch 00001: val_loss improved from inf to 0.02629, saving model to best_model.h5\n", "2903/2903 [==============================] - 1s 266us/sample - loss: 0.0282 - val_loss: 0.0263\n", "Epoch 2/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0116\n", "Epoch 00002: val_loss improved from 0.02629 to 0.01875, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 44us/sample - loss: 0.0111 - val_loss: 0.0188\n", "Epoch 3/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0064\n", "Epoch 00003: val_loss improved from 0.01875 to 0.01634, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 43us/sample - loss: 0.0065 - val_loss: 0.0163\n", "Epoch 4/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0049\n", "Epoch 00004: val_loss improved from 0.01634 to 0.01453, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 42us/sample - loss: 0.0050 - val_loss: 0.0145\n", "Epoch 5/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0039\n", "Epoch 00005: val_loss improved from 0.01453 to 0.01223, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 40us/sample - loss: 0.0038 - val_loss: 0.0122\n", "Epoch 6/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0033\n", "Epoch 00006: val_loss improved from 0.01223 to 0.01057, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 42us/sample - loss: 0.0032 - val_loss: 0.0106\n", "Epoch 7/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0032\n", "Epoch 00007: val_loss did not improve from 0.01057\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 0.0034 - val_loss: 0.0114\n", "Epoch 8/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0031\n", "Epoch 00008: val_loss did not improve from 0.01057\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 0.0031 - val_loss: 0.0124\n", "Epoch 9/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0029\n", "Epoch 00009: val_loss did not improve from 0.01057\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 0.0030 - val_loss: 0.0118\n", "Epoch 10/2000\n", "1408/2903 [=============>................] - ETA: 0s - loss: 0.0024\n", "Epoch 00010: val_loss did not improve from 0.01057\n", "2903/2903 [==============================] - 0s 37us/sample - loss: 0.0027 - val_loss: 0.0113\n", "Epoch 11/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0029\n", "Epoch 00011: val_loss did not improve from 0.01057\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 0.0027 - val_loss: 0.0108\n", "Epoch 12/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0022\n", "Epoch 00012: val_loss improved from 0.01057 to 0.01029, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 51us/sample - loss: 0.0024 - val_loss: 0.0103\n", "Epoch 13/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0021\n", "Epoch 00013: val_loss did not improve from 0.01029\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 0.0022 - val_loss: 0.0146\n", "Epoch 14/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0023\n", "Epoch 00014: val_loss did not improve from 0.01029\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 0.0023 - val_loss: 0.0117\n", "Epoch 15/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0023\n", "Epoch 00015: val_loss did not improve from 0.01029\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0023 - val_loss: 0.0107\n", "Epoch 16/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0019\n", "Epoch 00016: val_loss improved from 0.01029 to 0.00999, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 51us/sample - loss: 0.0022 - val_loss: 0.0100\n", "Epoch 17/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0023\n", "Epoch 00017: val_loss did not improve from 0.00999\n", "2903/2903 [==============================] - 0s 37us/sample - loss: 0.0024 - val_loss: 0.0103\n", "Epoch 18/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0021\n", "Epoch 00018: val_loss improved from 0.00999 to 0.00956, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 49us/sample - loss: 0.0021 - val_loss: 0.0096\n", "Epoch 19/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0022\n", "Epoch 00019: val_loss did not improve from 0.00956\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0023 - val_loss: 0.0108\n", "Epoch 20/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0022\n", "Epoch 00020: val_loss did not improve from 0.00956\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 0.0022 - val_loss: 0.0111\n", "Epoch 21/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0015\n", "Epoch 00021: val_loss improved from 0.00956 to 0.00909, saving model to best_model.h5\n", "2903/2903 [==============================] - 0s 46us/sample - loss: 0.0020 - val_loss: 0.0091\n", "Epoch 22/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0019\n", "Epoch 00022: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0019 - val_loss: 0.0097\n", "Epoch 23/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0017\n", "Epoch 00023: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0017 - val_loss: 0.0117\n", "Epoch 24/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0021\n", "Epoch 00024: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 0.0021 - val_loss: 0.0097\n", "Epoch 25/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0017\n", "Epoch 00025: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0016 - val_loss: 0.0095\n", "Epoch 26/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0015\n", "Epoch 00026: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0015 - val_loss: 0.0114\n", "Epoch 27/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0015\n", "Epoch 00027: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 26us/sample - loss: 0.0015 - val_loss: 0.0117\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 28/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0017\n", "Epoch 00028: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0018 - val_loss: 0.0117\n", "Epoch 29/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0016\n", "Epoch 00029: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 0.0017 - val_loss: 0.0105\n", "Epoch 30/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0015\n", "Epoch 00030: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0016 - val_loss: 0.0103\n", "Epoch 31/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0016\n", "Epoch 00031: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 0.0016 - val_loss: 0.0099\n", "Epoch 32/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0014\n", "Epoch 00032: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 0.0015 - val_loss: 0.0110\n", "Epoch 33/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0017\n", "Epoch 00033: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 39us/sample - loss: 0.0016 - val_loss: 0.0106\n", "Epoch 34/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0013\n", "Epoch 00034: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 0.0013 - val_loss: 0.0114\n", "Epoch 35/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0012\n", "Epoch 00035: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0013 - val_loss: 0.0102\n", "Epoch 36/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0013\n", "Epoch 00036: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 0.0013 - val_loss: 0.0096\n", "Epoch 37/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0012 \n", "Epoch 00037: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 0.0013 - val_loss: 0.0102\n", "Epoch 38/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0014 \n", "Epoch 00038: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0014 - val_loss: 0.0104\n", "Epoch 39/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0015\n", "Epoch 00039: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 0.0015 - val_loss: 0.0110\n", "Epoch 40/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0014\n", "Epoch 00040: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 0.0014 - val_loss: 0.0109\n", "Epoch 41/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0013\n", "Epoch 00041: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 0.0013 - val_loss: 0.0105\n", "Epoch 42/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0013\n", "Epoch 00042: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 0.0012 - val_loss: 0.0112\n", "Epoch 43/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0011 \n", "Epoch 00043: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 0.0011 - val_loss: 0.0113\n", "Epoch 44/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0013 \n", "Epoch 00044: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0013 - val_loss: 0.0118\n", "Epoch 45/2000\n", "2560/2903 [=========================>....] - ETA: 0s - loss: 0.0013\n", "Epoch 00045: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 25us/sample - loss: 0.0013 - val_loss: 0.0114\n", "Epoch 46/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 0.0011 \n", "Epoch 00046: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 0.0011 - val_loss: 0.0113\n", "Epoch 47/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0011\n", "Epoch 00047: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 0.0011 - val_loss: 0.0109\n", "Epoch 48/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0011 \n", "Epoch 00048: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 26us/sample - loss: 0.0011 - val_loss: 0.0111\n", "Epoch 49/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 0.0011 \n", "Epoch 00049: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 0.0011 - val_loss: 0.0118\n", "Epoch 50/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0010\n", "Epoch 00050: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 0.0011 - val_loss: 0.0117\n", "Epoch 51/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 9.9550e-04\n", "Epoch 00051: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 0.0011 - val_loss: 0.0111\n", "Epoch 52/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 0.0011 \n", "Epoch 00052: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 0.0011 - val_loss: 0.0105\n", "Epoch 53/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 9.7608e-04\n", "Epoch 00053: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 9.7248e-04 - val_loss: 0.0121\n", "Epoch 54/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 9.1915e-04\n", "Epoch 00054: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 9.8284e-04 - val_loss: 0.0116\n", "Epoch 55/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0010\n", "Epoch 00055: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 0.0010 - val_loss: 0.0115\n", "Epoch 56/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 9.5331e-04\n", "Epoch 00056: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 9.7758e-04 - val_loss: 0.0111\n", "Epoch 57/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 9.8361e-04\n", "Epoch 00057: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 9.6959e-04 - val_loss: 0.0113\n", "Epoch 58/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 9.1950e-04\n", "Epoch 00058: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 9.5159e-04 - val_loss: 0.0105\n", "Epoch 59/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 8.8970e-04\n", "Epoch 00059: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 0.0010 - val_loss: 0.0116\n", "Epoch 60/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 8.1791e-04\n", "Epoch 00060: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 9.2781e-04 - val_loss: 0.0114\n", "Epoch 61/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 9.4657e-04\n", "Epoch 00061: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 38us/sample - loss: 9.3655e-04 - val_loss: 0.0119\n", "Epoch 62/2000\n", "1664/2903 [================>.............] - ETA: 0s - loss: 0.0011 \n", "Epoch 00062: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 39us/sample - loss: 0.0011 - val_loss: 0.0132\n", "Epoch 63/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 0.0013\n", "Epoch 00063: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 40us/sample - loss: 0.0013 - val_loss: 0.0110\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 64/2000\n", "1280/2903 [============>.................] - ETA: 0s - loss: 8.7640e-04\n", "Epoch 00064: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 42us/sample - loss: 8.4999e-04 - val_loss: 0.0104\n", "Epoch 65/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 7.9611e-04\n", "Epoch 00065: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 8.7371e-04 - val_loss: 0.0108\n", "Epoch 66/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 8.4635e-04\n", "Epoch 00066: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 9.1774e-04 - val_loss: 0.0123\n", "Epoch 67/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 8.8140e-04\n", "Epoch 00067: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 8.7677e-04 - val_loss: 0.0115\n", "Epoch 68/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 8.1287e-04\n", "Epoch 00068: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 26us/sample - loss: 8.2816e-04 - val_loss: 0.0119\n", "Epoch 69/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 9.5661e-04\n", "Epoch 00069: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 9.4845e-04 - val_loss: 0.0111\n", "Epoch 70/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 9.4214e-04\n", "Epoch 00070: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 9.2418e-04 - val_loss: 0.0115\n", "Epoch 71/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 8.8652e-04\n", "Epoch 00071: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 9.3996e-04 - val_loss: 0.0120\n", "Epoch 72/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 0.0015\n", "Epoch 00072: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 0.0014 - val_loss: 0.0117\n", "Epoch 73/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 9.7117e-04\n", "Epoch 00073: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 9.7685e-04 - val_loss: 0.0109\n", "Epoch 74/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 8.1636e-04\n", "Epoch 00074: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 8.3132e-04 - val_loss: 0.0116\n", "Epoch 75/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 8.1337e-04\n", "Epoch 00075: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 9.9363e-04 - val_loss: 0.0125\n", "Epoch 76/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 0.0013\n", "Epoch 00076: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 0.0012 - val_loss: 0.0114\n", "Epoch 77/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 7.2394e-04\n", "Epoch 00077: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 7.4719e-04 - val_loss: 0.0113\n", "Epoch 78/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 7.8732e-04\n", "Epoch 00078: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 8.3259e-04 - val_loss: 0.0106\n", "Epoch 79/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 7.6195e-04\n", "Epoch 00079: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 8.6152e-04 - val_loss: 0.0128\n", "Epoch 80/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 8.4719e-04\n", "Epoch 00080: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 8.5586e-04 - val_loss: 0.0123\n", "Epoch 81/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 7.7491e-04\n", "Epoch 00081: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 8.3792e-04 - val_loss: 0.0107\n", "Epoch 82/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 7.1667e-04\n", "Epoch 00082: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 7.4843e-04 - val_loss: 0.0114\n", "Epoch 83/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 7.4752e-04\n", "Epoch 00083: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 7.1481e-04 - val_loss: 0.0117\n", "Epoch 84/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.5041e-04\n", "Epoch 00084: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 7.3454e-04 - val_loss: 0.0126\n", "Epoch 85/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 7.7535e-04\n", "Epoch 00085: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 7.4483e-04 - val_loss: 0.0114\n", "Epoch 86/2000\n", "1664/2903 [================>.............] - ETA: 0s - loss: 7.5964e-04\n", "Epoch 00086: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 37us/sample - loss: 7.8158e-04 - val_loss: 0.0122\n", "Epoch 87/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 7.6849e-04\n", "Epoch 00087: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 37us/sample - loss: 7.2185e-04 - val_loss: 0.0135\n", "Epoch 88/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 6.5576e-04\n", "Epoch 00088: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 6.4484e-04 - val_loss: 0.0107\n", "Epoch 89/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.7173e-04\n", "Epoch 00089: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 6.7766e-04 - val_loss: 0.0119\n", "Epoch 90/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 5.9617e-04\n", "Epoch 00090: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 6.3005e-04 - val_loss: 0.0116\n", "Epoch 91/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 7.2313e-04\n", "Epoch 00091: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 7.1930e-04 - val_loss: 0.0124\n", "Epoch 92/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 7.0330e-04\n", "Epoch 00092: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 7.2412e-04 - val_loss: 0.0112\n", "Epoch 93/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 6.7305e-04\n", "Epoch 00093: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 7.0767e-04 - val_loss: 0.0108\n", "Epoch 94/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 6.9092e-04\n", "Epoch 00094: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 6.9017e-04 - val_loss: 0.0115\n", "Epoch 95/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 6.5161e-04\n", "Epoch 00095: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 6.3312e-04 - val_loss: 0.0114\n", "Epoch 96/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 6.4123e-04\n", "Epoch 00096: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 29us/sample - loss: 6.2545e-04 - val_loss: 0.0115\n", "Epoch 97/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 5.6344e-04\n", "Epoch 00097: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 6.0680e-04 - val_loss: 0.0108\n", "Epoch 98/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 8.2574e-04\n", "Epoch 00098: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 26us/sample - loss: 8.1689e-04 - val_loss: 0.0110\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 99/2000\n", "2560/2903 [=========================>....] - ETA: 0s - loss: 7.3464e-04\n", "Epoch 00099: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 25us/sample - loss: 7.2157e-04 - val_loss: 0.0132\n", "Epoch 100/2000\n", "2560/2903 [=========================>....] - ETA: 0s - loss: 7.1252e-04\n", "Epoch 00100: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 25us/sample - loss: 7.1788e-04 - val_loss: 0.0112\n", "Epoch 101/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 6.5079e-04\n", "Epoch 00101: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 6.2962e-04 - val_loss: 0.0123\n", "Epoch 102/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 6.0506e-04\n", "Epoch 00102: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 32us/sample - loss: 6.0545e-04 - val_loss: 0.0112\n", "Epoch 103/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.0205e-04\n", "Epoch 00103: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 6.2116e-04 - val_loss: 0.0119\n", "Epoch 104/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 6.0589e-04\n", "Epoch 00104: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 6.4509e-04 - val_loss: 0.0117\n", "Epoch 105/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.4410e-04\n", "Epoch 00105: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 6.4799e-04 - val_loss: 0.0111\n", "Epoch 106/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 6.1937e-04\n", "Epoch 00106: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 6.1239e-04 - val_loss: 0.0120\n", "Epoch 107/2000\n", "1664/2903 [================>.............] - ETA: 0s - loss: 6.6325e-04\n", "Epoch 00107: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 36us/sample - loss: 6.1345e-04 - val_loss: 0.0113\n", "Epoch 108/2000\n", "1792/2903 [=================>............] - ETA: 0s - loss: 5.1688e-04\n", "Epoch 00108: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 35us/sample - loss: 5.3364e-04 - val_loss: 0.0125\n", "Epoch 109/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.6596e-04\n", "Epoch 00109: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 6.7963e-04 - val_loss: 0.0115\n", "Epoch 110/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 6.4278e-04\n", "Epoch 00110: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 6.0299e-04 - val_loss: 0.0112\n", "Epoch 111/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 5.3297e-04\n", "Epoch 00111: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 5.5112e-04 - val_loss: 0.0115\n", "Epoch 112/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 5.8987e-04\n", "Epoch 00112: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 34us/sample - loss: 6.0769e-04 - val_loss: 0.0130\n", "Epoch 113/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 5.5111e-04\n", "Epoch 00113: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 5.6370e-04 - val_loss: 0.0121\n", "Epoch 114/2000\n", "1920/2903 [==================>...........] - ETA: 0s - loss: 4.8050e-04\n", "Epoch 00114: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 4.8669e-04 - val_loss: 0.0116\n", "Epoch 115/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 4.4327e-04\n", "Epoch 00115: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 4.9087e-04 - val_loss: 0.0120\n", "Epoch 116/2000\n", "2304/2903 [======================>.......] - ETA: 0s - loss: 6.1409e-04\n", "Epoch 00116: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 28us/sample - loss: 6.5732e-04 - val_loss: 0.0120\n", "Epoch 117/2000\n", "2176/2903 [=====================>........] - ETA: 0s - loss: 6.1762e-04\n", "Epoch 00117: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 31us/sample - loss: 5.7927e-04 - val_loss: 0.0121\n", "Epoch 118/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 5.1383e-04\n", "Epoch 00118: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 33us/sample - loss: 5.3133e-04 - val_loss: 0.0120\n", "Epoch 119/2000\n", "2048/2903 [====================>.........] - ETA: 0s - loss: 5.2612e-04\n", "Epoch 00119: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 30us/sample - loss: 5.3072e-04 - val_loss: 0.0115\n", "Epoch 120/2000\n", "2432/2903 [========================>.....] - ETA: 0s - loss: 4.7557e-04\n", "Epoch 00120: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 27us/sample - loss: 4.9869e-04 - val_loss: 0.0116\n", "Epoch 121/2000\n", "2560/2903 [=========================>....] - ETA: 0s - loss: 4.7564e-04\n", "Epoch 00121: val_loss did not improve from 0.00909\n", "2903/2903 [==============================] - 0s 26us/sample - loss: 4.7991e-04 - val_loss: 0.0120\n", "Epoch 00121: early stopping\n" ] } ], "source": [ "# Callbacks for the model\n", "es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)\n", "mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)\n", "reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.90,\n", " patience=10, min_lr=0.000001)\n", "n_split = 10\n", "h = []\n", "e = []\n", "j = 0\n", "badIndexes = {}\n", "#for train_index,test_index in KFold(n_split, True, 1).split(X_train_2):\n", " #x_train,x_test=X[train_index],X[test_index]\n", " #y_train,y_test=Y[train_index],Y[test_index]\n", " \n", " #model=create_model()\n", " #h.append(model.fit(x_train, y_train, batch_size=16, verbose=1,epochs=1000,callbacks=[mc]))\n", " #e.append({'Fold': j, 'MSE': model.evaluate(x_test,y_test)})\n", " #j += 1\n", " #if(j==2):\n", " # badIndexes = {'x':train_index, 'y': test_index}\n", " #print('Model evaluation ',model.evaluate(x_test,y_test))\n", "\n", "#log_dir = os.path.join(\"./logs/scalars/\")\n", "#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch=0)\n", "\n", "# Create and run the model\n", "model = create_model()\n", "print(model.summary())\n", "h = model.fit(\n", " X_train,\n", " Y_train,\n", " validation_split=VALIDATION_SPLIT,\n", " batch_size=BATCH_SIZE,\n", " epochs=2000,\n", " shuffle=True,\n", " verbose=1,\n", " callbacks=[es, mc, reduce_lr \n", " #tensorboard_callback\n", " ]\n", ")\n", "\n", "# Save the best model for later use\n", "best_model_org = load_model('best_model.h5')\n" ] }, { "cell_type": "code", "execution_count": 196, "metadata": {}, "outputs": [], "source": [ "#Best model found by using WANDB developer tool\n", "best_model = load_model('./models/wandb/model-best.h5')\n", "best_model_2 = load_model('./models/wandb/model-best_2.h5')\n", "best_model_3 = load_model('./models/wandb/model-best_3.h5')\n", "best_model_4 = load_model('./models/wandb/model-best_4.h5')\n", "best_model_5 = load_model('./models/wandb/model-best_5.h5')" ] }, { "cell_type": "code", "execution_count": 197, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE for the data set is: 0.001658357523372472\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#Print the training history\n", "fig, ax = plt.subplots(figsize=(20, 10))\n", "lines = ax.plot(h.history['loss'], label='Loss')\n", "lines = ax.plot(h.history['val_loss'], label='val_loss')\n", "ax.legend(loc='upper right',fontsize=15)\n", "\n", "test_error_rate = best_model.evaluate(X_train, Y_train, verbose=0)\n", "print(\"MSE for the data set is: {}\".format(test_error_rate))" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "23320.873046875\n", "8.325565338134766\n", "33232.23302758935\n", "(817,)\n", "(817, 1)\n", "0.9483329544524007\n" ] } ], "source": [ "predictors = keys.drop(target)\n", "\n", "# Make predictions from test set\n", "prediction = best_model.predict(X_test)\n", "\n", "MAE = 0\n", "RME = 0\n", "RMSE = 0\n", "\n", "pred_p = []\n", "pred_p2 = []\n", "pred_p3 = []\n", "org_p = []\n", "\n", "# Scale target values back to normal\n", "for p in Y_test:\n", " y_2 = p\n", " y_2 -= added\n", " y_2 /= multiplied_by\n", " org_p.append(y_2)\n", " \n", "org_p = np.asarray(org_p)\n", "\n", "# Scale predicted values back to normal\n", "for p in prediction:\n", " y_2 = p\n", " y_2 -= added\n", " y_2 /= multiplied_by\n", " pred_p.append(y_2)\n", " \n", "# Calculate metrics for sensitivity analysis\n", "for i in range(len(pred_p)):\n", " MAE += abs(pred_p[i] - org_p[i])\n", " RME += abs(pred_p[i] - org_p[i]) / org_p[i] * 100\n", " RMSE += (pred_p[i] - org_p[i])**2\n", " \n", "\n", "MAE = float(MAE / len(pred_p))\n", "RME = float(RME / len(pred_p))\n", "RMSE = math.sqrt(RMSE / len(pred_p))\n", "print(MAE)\n", "print(RME)\n", "print(RMSE)\n", "newArray = np.asarray(pred_p)\n", "#print(\"\\nRMSPE for the data set is: {0:.2f}%\".format(error_p))\n", "\n", "print(org_p.shape)\n", "print(newArray.shape)\n", "\n", "# Calculate r squared\n", "org_p = org_p.reshape((817,))\n", "newArray = newArray.reshape((817,))\n", "correlation_matrix = np.corrcoef(org_p, newArray)\n", "correlation_xy = correlation_matrix[0,1]\n", "r_squared = correlation_xy**2\n", "\n", "print(r_squared)" ] }, { "cell_type": "code", "execution_count": 212, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "314070.84\n", "283000.0\n", "(817,)\n", "1\n", "Min value - 115826.23 Min original - 101000.0\n", "Max value - 960688.75 Max original - 990000.0\n", "Mean - 311228.25 Mean original - 305065.43772337824\n", "Std - 139710.78 Std original - 143663.35221658647\n", "DIF: -0.00027964749506399506\n", "\n", "MSE for the data set is: 0.0014\n" ] }, { "data": { "text/plain": [ "(array([ 0., 200000., 400000., 600000., 800000., 1000000.,\n", " 1200000.]), )" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from matplotlib import rcParams, cycler\n", "\n", "print(\"Min value -\", min(newArray), \" Min original - \", min(org_p))\n", "print(\"Max value -\", max(newArray), \" Max original - \", max(org_p))\n", "print(\"Mean -\", np.nanmean(newArray), \" Mean original - \", np.nanmean(org_p))\n", "print(\"Std -\", np.nanstd(newArray), \" Std original - \", np.nanstd(org_p))\n", "\n", "error_rate = best_model.evaluate(X_test, Y_test, verbose=0)\n", "print(\"DIF: \", (error_rate - test_error_rate))\n", "print(\"\\nMSE for the data set is: {0:.4f}\".format(error_rate))\n", "\n", "cmap = plt.cm.coolwarm\n", "rcParams['axes.prop_cycle'] = cycler(color=cmap(np.linspace(0, 1, 2)))\n", "\n", "# Print the predictions and targeted prices\n", "fig, ax = plt.subplots(figsize=(15, 15))\n", "lines = ax.plot(org_p, org_p, label='Predicted', linewidth=2)\n", "ax.scatter(org_p, newArray, color='r')\n", "ax.set_xlabel('Original price',fontsize=18)\n", "ax.set_ylabel('Predicted price',fontsize=18)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(MAE), \"RME: {0:.2f}%\".format(RME), \"RMSE: {0:.0f} €\".format(RMSE), \"R²: {0:.2f}\".format(r_squared)))\n", "ax.text(0.01, 0.91, txt, fontsize=16,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "plt.xticks(fontsize=14)\n", "plt.yticks(fontsize=14)" ] }, { "cell_type": "code", "execution_count": 214, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[18329.39915531517, 9.115760385440007, 24365.823791975272, 0.8800033819288181], [20606.306053314696, 8.154917373656778, 28222.32272844262, 0.9437376308301402], [25804.80205195783, 8.119345543883703, 38430.00922171203, 0.9349564534440795], [29743.508049242424, 9.045208528737414, 38703.161362382496, 0.9530557636322816], [24786.3566015625, 5.292388211234174, 35173.41986743031, 0.9525933284681801], [45571.90401785716, 14.961291596888406, 63119.48403356005, 0.8913788826248188]]\n" ] } ], "source": [ "# Calculate sensitivity analysis for each room type\n", "rooms = []\n", "pred_sep = [[], [], [], [], [], []]\n", "org_sep = [[], [], [], [], [], []]\n", "stats = []\n", "for i in range(len(X_test)):\n", " scaled = math.floor((X_test[i][3] - scaler.min_[3]) / scaler.scale_[3])\n", " rooms.append(scaled)\n", " \n", "for i in range(len(newArray)):\n", " if rooms[i] == 1:\n", " pred_sep[0].append(newArray[i])\n", " org_sep[0].append(org_p[i])\n", " elif rooms[i] == 2:\n", " pred_sep[1].append(newArray[i])\n", " org_sep[1].append(org_p[i])\n", " elif rooms[i] == 3:\n", " pred_sep[2].append(newArray[i])\n", " org_sep[2].append(org_p[i])\n", " elif rooms[i] == 4:\n", " pred_sep[3].append(newArray[i])\n", " org_sep[3].append(org_p[i])\n", " elif rooms[i] == 5:\n", " pred_sep[4].append(newArray[i])\n", " org_sep[4].append(org_p[i])\n", " elif rooms[i] == 6:\n", " pred_sep[5].append(newArray[i])\n", " org_sep[5].append(org_p[i])\n", " \n", "\n", "for n in range(len(pred_sep)):\n", " for i in range(len(pred_sep[n])):\n", " MAE += abs(pred_sep[n][i] - org_sep[n][i])\n", " RME += abs(pred_sep[n][i] / org_sep[n][i] - 1) * 100\n", " RMSE += (pred_sep[n][i] - org_sep[n][i])**2\n", "\n", " correlation_matrix = np.corrcoef(org_sep[n], pred_sep[n])\n", " correlation_xy = correlation_matrix[0,1]\n", " r_squared = correlation_xy**2\n", " MAE = MAE / len(pred_sep[n])\n", " RME = RME / len(pred_sep[n])\n", " RMSE = math.sqrt(RMSE / len(pred_sep[n]))\n", " stats.append([MAE,RME,RMSE,r_squared])\n", " MAE, RME, RMSE = 0, 0, 0\n", "\n", "print(stats)" ] }, { "cell_type": "code", "execution_count": 216, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "\n", "# Draw the graph for each room type\n", "# Red points are predictions and blue line is targeted price\n", "fig2, ax2 = plt.subplots(2, 3, figsize=(15, 10))\n", "ax2[0, 0].plot(org_sep[0], org_sep[0], label='Predicted', linewidth=2)\n", "ax2[0, 0].scatter(org_sep[0], pred_sep[0], color='r')\n", "ax2[0, 0].set_title('Studio')\n", "ax2[0, 1].plot(org_sep[1], org_sep[1], label='Predicted', linewidth=2)\n", "ax2[0, 1].scatter(org_sep[1], pred_sep[1], color='r')\n", "ax2[0, 1].set_title('Two rooms')\n", "\n", "ax2[0, 2].plot(org_sep[2], org_sep[2], label='Predicted', linewidth=2)\n", "ax2[0, 2].scatter(org_sep[2], pred_sep[2], color='r')\n", "ax2[0, 2].set_title('Three rooms')\n", "\n", "ax2[1, 0].plot(org_sep[3], org_sep[3], label='Predicted', linewidth=2)\n", "ax2[1, 0].scatter(org_sep[3], pred_sep[3], color='r')\n", "ax2[1, 0].set_title('Four rooms')\n", "\n", "ax2[1, 1].plot(org_sep[4], org_sep[4], label='Predicted', linewidth=2)\n", "ax2[1, 1].scatter(org_sep[4], pred_sep[4], color='r')\n", "ax2[1, 1].set_title('Five rooms')\n", "\n", "ax2[1, 2].plot(org_sep[5], org_sep[5], label='Predicted', linewidth=2)\n", "ax2[1, 2].scatter(org_sep[5], pred_sep[5], color='r')\n", "ax2[1, 2].set_title('Six or more')\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[0][0]), \"RME: {0:.2f}%\".format(stats[0][1]), \"RMSE: {0:.0f} €\".format(stats[0][2]), \"R²: {0:.2f}\".format(stats[0][3])))\n", "ax2[1, 2].text(-2.6, 1.7, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[1][0]), \"RME: {0:.2f}%\".format(stats[1][1]), \"RMSE: {0:.0f} €\".format(stats[1][2]), \"R²: {0:.2f}\".format(stats[1][3])))\n", "ax2[1, 2].text(-1.4, 1.7, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[2][0]), \"RME: {0:.2f}%\".format(stats[2][1]), \"RMSE: {0:.0f} €\".format(stats[2][2]), \"R²: {0:.2f}\".format(stats[2][3])))\n", "ax2[1, 2].text(-0.2, 1.7, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[3][0]), \"RME: {0:.2f}%\".format(stats[3][1]), \"RMSE: {0:.0f} €\".format(stats[3][2]), \"R²: {0:.2f}\".format(stats[3][3])))\n", "ax2[1, 2].text(-2.6, 0.5, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[4][0]), \"RME: {0:.2f}%\".format(stats[4][1]), \"RMSE: {0:.0f} €\".format(stats[4][2]), \"R²: {0:.2f}\".format(stats[4][3])))\n", "ax2[1, 2].text(-1.4, 0.5, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "txt = '\\n'.join((\"MAE: {0:.0f} €\".format(stats[5][0]), \"RME: {0:.2f}%\".format(stats[5][1]), \"RMSE: {0:.0f} €\".format(stats[5][2]), \"R²: {0:.2f}\".format(stats[5][3])))\n", "ax2[1, 2].text(-0.2, 0.5, txt, fontsize=14,\n", " verticalalignment='bottom', transform=ax.transAxes)\n", "\n", "\n", "plt.xticks(fontsize=10)\n", "plt.yticks(fontsize=10)\n", "\n", "for ax in ax2.flat:\n", " ax.set(xlabel='Original price', ylabel='Predicted price')\n", "\n", "# Hide x labels and tick labels for top plots and y ticks for right plots.\n", "for ax in ax2.flat:\n", " ax.label_outer()" ] }, { "cell_type": "code", "execution_count": 186, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\jussi kalliola\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\tensorflowjs\\converters\\keras_h5_conversion.py:122: H5pyDeprecationWarning: The default file mode will change to 'r' (read-only) in h5py 3.0. To suppress this warning, pass the mode you need to h5py.File(), or set the global default h5.get_config().default_file_mode, or set the environment variable H5PY_DEFAULT_READONLY=1. Available modes are: 'r', 'r+', 'w', 'w-'/'x', 'a'. See the docs for details.\n", " return h5py.File(h5file)\n" ] } ], "source": [ "#tfjs.converters.save_keras_model(best_model, './models/wandb/')\n", "#tfjs.converters.save_keras_model(model, './models/etuovi_model13/')\n", "#scalerDf.to_csv(r'.\\models\\wandb\\scaler.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6rc1" } }, "nbformat": 4, "nbformat_minor": 2 }