In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import seaborn as sns
mpl.rcParams['figure.figsize']=(12,8)
mpl.rcParams['axes.grid']=False
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [None]:
dataset=pd.read_excel("JKweather.xlsx", usecols=[0,3,5])
dataset=pd.DataFrame(dataset)
dataset

In [None]:
dfs = dataset[dataset['Station name'] == 'Sgr']
dfs = pd.DataFrame(dfs)
dfs

In [None]:
dfs['Date']=pd.to_datetime(dfs['Date'])
dfs

In [None]:
df_indexed=dfs.set_index('Date')
df_indexed

In [None]:
timeseries=df_indexed.drop('Station name',axis=1)
timeseries

In [None]:
timeseries= timeseries.assign(Tmin_ema=timeseries["Tmin"].ewm(alpha=0.5).mean())
timeseries

In [None]:
timeseries=timeseries.drop('Tmin',axis=1)
timeseries

In [None]:
timeseries.isnull().sum().sum()

In [None]:
ts=timeseries.copy()
ts= pd.DataFrame(ts)
ts

In [None]:
timeseries['year']=[d.year for d in timeseries.index]
timeseries['month']=[d.strftime('%b') for d in timeseries.index]
years=timeseries['year'].unique
timeseries=pd.DataFrame(timeseries)
timeseries

In [None]:
sns.barplot(x='year',y='Tmin_ema',data=timeseries)

In [None]:
sns.barplot(x='month',y='Tmin_ema',data=timeseries)

In [None]:
import statsmodels.api as sm
decomposed_data = ts['Tmin_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(decomposed_data, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposed_data, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
plt.plot(ts,color='brown')

In [None]:
lag_plot(ts)
plt.show()

In [None]:
order_of_differencing = 1
differenced_data = ts.diff(order_of_differencing).dropna()
differenced_data

In [None]:
import statsmodels.api as sm
differenced_decomposed_data = differenced_data['Tmin_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(differenced_decomposed_data, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(differenced_decomposed_data, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
plt.plot(differenced_data,color='brown')

In [None]:
lag_plot(differenced_data)
plt.show()

In [None]:
Tmin_values = differenced_data['Tmin_ema'].values.reshape(-1, 1)
Tmin_values

In [None]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_Tmin = scaler.fit_transform(Tmin_values)
scaled_Tmin

In [None]:
# Split the data into training and testing sets
train_size = int(len(scaled_Tmin) * 0.8)
train_data, test_data = scaled_Tmin[:train_size], scaled_Tmin[train_size:]

In [None]:
train_data.shape

In [None]:
test_data.shape

In [None]:
train_data

In [None]:
test_data

In [None]:
# Prepare the data for LSTM input
def create_sequences(scaled_Tmin, seq_length):
    X, y = [], []
    for i in range(len(scaled_Tmin) - seq_length):
        X.append(scaled_Tmin[i:i+seq_length])
        y.append(scaled_Tmin[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 7  # Length of input sequences
train_X, train_y = create_sequences(train_data, seq_length)
test_X, test_y = create_sequences(test_data, seq_length)

In [None]:
train_X.shape

In [None]:
train_y.shape

In [None]:
train_X

In [None]:
train_y

In [None]:
test_X.shape

In [None]:
test_y.shape

In [None]:
test_X

In [None]:
test_y

In [None]:
# define model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mse')
model.summary()

In [None]:
# Train the LSTM model
model.fit(train_X, train_y,verbose=1)

# Make predictions on test data
predictions = model.predict(test_X)

In [None]:
predictions

In [None]:
predictions=pd.DataFrame(predictions)
column_index = 0
current_name = predictions.columns[column_index]
# Define the new column name
new_name = 'Predicted_Tmin'
# Rename the column
predictions.rename(columns={current_name: new_name}, inplace=True)
predictions

In [None]:
test_y=pd.DataFrame(test_y)
column_index = 0
current_name = test_y.columns[column_index]
# Define the new column name
new_name = 'Actual_Tmin'
# Rename the column
test_y.rename(columns={current_name: new_name}, inplace=True)
test_y

In [None]:
ActualVsPredicted = pd.concat([test_y, predictions],axis=1)
ActualVsPredicted

In [None]:
plt.plot(ActualVsPredicted)

In [None]:
# Calculate evaluation metrics
mae = mean_absolute_error(test_y,predictions)
mse = mean_squared_error(test_y,predictions)
rmse = mean_squared_error(test_y,predictions, squared=False)
mape = mean_absolute_percentage_error(test_y,predictions)

# Print the evaluation results
print('Mean Absolute Error (MAE):', mae)
print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Percentage Error (MAPE):', mape)

In [None]:
ActualVsPredicted['diff'] = ActualVsPredicted['Actual_Tmin'] - ActualVsPredicted['Predicted_Tmin']
ActualVsPredicted['diff'] = ActualVsPredicted['diff'].abs()
ActualVsPredicted

In [None]:
plt.plot(ActualVsPredicted['diff'])

TUNING

TUNING NUMBER OF EPOCHS

In [None]:
# Define your range of epochs to try
epochs_range = [50, 100, 150, 200, 250, 300]

best_test_loss = float('inf')
best_epochs = None

# Iterate over the epochs range
for epochs in epochs_range:
    print(f"Testing epochs: {epochs}")
    # Create and compile the LSTM model
    model2 = Sequential()
    model2.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
    model2.add(LSTM(50))
    model2.add(Dense(1))
    model2.compile(loss='mse')

    # Train the model
    history = model2.fit(train_X, train_y, epochs=epochs, verbose=0)

    # Evaluate the model on the test set
    test_loss = model2.evaluate(test_X, test_y)

    # Check if the current model has the best test loss
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_epochs = epochs

TUNING BATCH SIZE

In [None]:
# Define a list of batch sizes to try
batch_sizes = [16, 32, 64, 128]
best_test_loss = float('inf')
best_batch_size = None
# Iterate over each batch size
for batch_size in batch_sizes:
    print(f"Testing batch size: {batch_size}")

    # Create an LSTM model
    model3 = Sequential()
    model3.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
    model3.add(LSTM(50))
    model3.add(Dense(1))
    model3.compile(loss='mse')

    # Train the model
    model3.fit(train_X, train_y, batch_size=batch_size, validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model3.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_batch_size = batch_size

TUNING OPTIMIZER

In [None]:
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

# Define a list of optimizers to try
optimizers = ['SGD', 'RMSprop', 'Adam']

best_test_loss = float('inf')
best_optimizer = None

# Iterate over each optimizer
for optimizer in optimizers:
    print(f"Testing optimizer: {optimizer}")

    # Create an LSTM model
    model4 = Sequential()
    model4.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
    model4.add(LSTM(50))
    model4.add(Dense(1))
    model4.compile(optimizer=optimizer, loss='mse')

    # Train the model
    model4.fit(train_X, train_y,validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model4.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_optimizer = optimizer


TUNING ACTIVATION FUNCTION

In [None]:
# Define a list of optimizers to try
activation_functions = ['relu', 'tanh', 'sigmoid']

best_test_loss = float('inf')
best_activation_function = None

# Iterate over each optimizer
for activation_function in activation_functions:
    print(f"Testing activation_function: {activation_function}")

    # Create an LSTM model
    model5 = Sequential()
    model5.add(LSTM(50, activation=activation_function, return_sequences=True, input_shape=(seq_length, 1)))
    model5.add(LSTM(50, activation=activation_function))
    model5.add(Dense(1))
    model5.compile(optimizer='RMSprop', loss='mse')

    # Train the model
    model5.fit(train_X, train_y,validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model5.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_activation_function = activation_function


MODEL BUILDING AFTER TUNING

In [None]:
# define model
tuned_model = Sequential()
tuned_model.add(LSTM(50, activation='tanh', return_sequences=True, input_shape=(seq_length, 1)))
tuned_model.add(LSTM(50, activation='tanh'))
tuned_model.add(Dense(1))
tuned_model.compile(optimizer='Adam', loss='mse')
tuned_model.summary()

In [None]:
# Train the LSTM model
tuned_model.fit(train_X, train_y, epochs=250, batch_size=32, verbose=1)

# Make predictions on test data
new_predictions = tuned_model.predict(test_X)

In [None]:
new_predictions=pd.DataFrame(new_predictions)
column_index = 0
current_name = new_predictions.columns[column_index]
# Define the new column name
new_name = 'new_Predicted_Tmin'
# Rename the column
new_predictions.rename(columns={current_name: new_name}, inplace=True)

In [None]:
ActualVsNewPredicted = pd.concat([test_y, new_predictions],axis=1)
ActualVsNewPredicted

In [None]:
plt.plot(ActualVsNewPredicted)

In [None]:
# Calculate evaluation metrics
mae = mean_absolute_error(ActualVsNewPredicted['Actual_Tmin'],ActualVsNewPredicted['new_Predicted_Tmin'])
mse = mean_squared_error(ActualVsNewPredicted['Actual_Tmin'],ActualVsNewPredicted['new_Predicted_Tmin'])
rmse = mean_squared_error(ActualVsNewPredicted['Actual_Tmin'],ActualVsNewPredicted['new_Predicted_Tmin'],squared=False)
mape = mean_absolute_percentage_error(ActualVsNewPredicted['Actual_Tmin'],ActualVsNewPredicted['new_Predicted_Tmin'])

# Print the evaluation results
print('Mean Absolute Error (MAE):', mae)
print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Percentage Error (MAPE):', mape)

In [None]:
ActualVsNewPredicted['diff'] = ActualVsNewPredicted['Actual_Tmin'] - ActualVsNewPredicted['new_Predicted_Tmin']
ActualVsNewPredicted['diff'] = ActualVsNewPredicted['diff'].abs()
ActualVsNewPredicted

In [None]:
plt.plot(ActualVsNewPredicted['diff'])