In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
import seaborn as sns
mpl.rcParams['figure.figsize']=(12,8)
mpl.rcParams['axes.grid']=False
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [None]:
dataset=pd.read_excel("JKweather.xlsx", usecols=[0,3,4,5,6])
dataset=pd.DataFrame(dataset)
dataset

In [None]:
dfs = dataset[dataset['Station name'] == 'Sgr']
dfs = pd.DataFrame(dfs)
dfs

In [None]:
dfs['Date']=pd.to_datetime(dfs['Date'])
dfs

In [None]:
df_indexed=dfs.set_index('Date')
df_indexed

In [None]:
timeseries=df_indexed.drop('Station name',axis=1)
timeseries

In [None]:
timeseries = timeseries.assign(Rainfall_ema=timeseries["Rainfall"].ewm(alpha=0.1).mean(),
                               Tmin_ema=timeseries["Tmin"].ewm(alpha=0.1).mean(),
                               Tmax_ema=timeseries["Tmax"].ewm(alpha=0.1).mean(),
                               )
timeseries

In [None]:
cols_to_drop=['Tmin', 'Tmax', 'Rainfall']
timeseries=timeseries.drop(cols_to_drop,axis=1)
timeseries

In [None]:
timeseries.isnull().sum().sum()

In [None]:
ts=timeseries.copy()
ts= pd.DataFrame(ts)
ts

In [None]:
timeseries['year']=[d.year for d in timeseries.index]
timeseries['month']=[d.strftime('%b') for d in timeseries.index]
years=timeseries['year'].unique
timeseries=pd.DataFrame(timeseries)
timeseries

In [None]:
sns.barplot(x='year',y='Rainfall_ema',data=timeseries)

In [None]:
sns.barplot(x='year',y='Tmin_ema',data=timeseries)

In [None]:
sns.barplot(x='year',y='Tmax_ema',data=timeseries)

In [None]:
sns.barplot(x='month',y='Rainfall_ema',data=timeseries)

In [None]:
sns.barplot(x='month',y='Tmin_ema',data=timeseries)

In [None]:
sns.barplot(x='month',y='Tmax_ema',data=timeseries)

In [None]:
import statsmodels.api as sm
decomposed_Rainfall = ts['Rainfall_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(decomposed_Rainfall, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposed_Rainfall, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm
decomposed_Tmin = ts['Tmin_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(decomposed_Tmin, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposed_Tmin, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm
decomposed_Tmax = ts['Tmax_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(decomposed_Tmax, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(decomposed_Tmax, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
# Plotting all three variables with different colors
plt.plot(ts['Rainfall_ema'], color='blue', label='Rainfall')

# Set the plot title and labels
plt.title('Rainfall over the years')
plt.xlabel('Time')
plt.ylabel('Rain')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
# Plotting all three variables with different colors
plt.plot(ts['Tmin_ema'], color='blue', label='Tmin')

# Set the plot title and labels
plt.title('Minimum Temperature over the years')
plt.xlabel('Time')
plt.ylabel('Tmin')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
# Plotting all three variables with different colors
plt.plot(ts['Tmax_ema'], color='green', label='Tmax')

# Set the plot title and labels
plt.title('Maximum Temperature over the years')
plt.xlabel('Time')
plt.ylabel('Rain')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(ts['Rainfall_ema'], c='blue', alpha=0.5, ax=ax ,label='Rainfall')
plt.legend()
plt.title('Lag Plot of Rainfalls')
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(ts['Tmin_ema'], c='red', alpha=0.5, ax=ax ,label='Tmin')
plt.legend()
plt.title('Lag Plot of Minimum Temperature')
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(ts['Tmax_ema'], c='green', alpha=0.5, ax=ax ,label='Tmax')
plt.legend()
plt.title('Lag Plot of Maximum Temperature')
plt.show()


In [None]:
order_of_differencing = 1
differenced_data = ts.diff(order_of_differencing).dropna()
differenced_data

In [None]:
import statsmodels.api as sm
differenced_decomposed_Rainfall = differenced_data['Rainfall_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(differenced_decomposed_Rainfall, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(differenced_decomposed_Rainfall, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm
differenced_decomposed_Tmin = differenced_data['Tmin_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(differenced_decomposed_Tmin, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(differenced_decomposed_Tmin, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm
differenced_decomposed_Tmax = differenced_data['Tmax_ema']
# Perform time series decomposition
decomposition = sm.tsa.seasonal_decompose(differenced_decomposed_Tmax, model='additive',period=7)

# Extract the components
trend = decomposition.trend
residual = decomposition.resid

In [None]:
import matplotlib.pyplot as plt

# Plot the components
plt.figure(figsize=(10, 8))
plt.subplot(4, 1, 1)
plt.plot(differenced_decomposed_Tmax, label='Original',color='red')
plt.legend(loc='best')
plt.subplot(4, 1, 2)
plt.plot(trend, label='Trend',color='green')
plt.legend(loc='best')
plt.subplot(4, 1, 3)
plt.plot(residual, label='Residual',color='black')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
# Plotting all three variables with different colors
plt.plot(differenced_data['Rainfall_ema'], color='blue', label='Rainfall')

# Set the plot title and labels
plt.title('Rainfall over the years')
plt.xlabel('Time')
plt.ylabel('Rain')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
# Plotting all three variables with different colors
plt.plot(differenced_data['Tmin_ema'], color='blue', label='Rainfall')

# Set the plot title and labels
plt.title('Minimum temperature over the years')
plt.xlabel('Time')
plt.ylabel('Rain')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
# Plotting all three variables with different colors
plt.plot(differenced_data['Tmax_ema'], color='blue', label='Rainfall')

# Set the plot title and labels
plt.title('Maximum Temperature over the years')
plt.xlabel('Time')
plt.ylabel('Rain')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(differenced_data['Rainfall_ema'], c='blue', alpha=0.5, ax=ax ,label='Rainfall')
plt.legend()
plt.title('Lag Plot of Rainfall')
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(differenced_data['Tmin_ema'], c='blue', alpha=0.5, ax=ax ,label='Rainfall')
plt.legend()
plt.title('Lag Plot of Minimum Temperature')
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

lag_plot(differenced_data['Tmax_ema'], c='blue', alpha=0.5, ax=ax ,label='Rainfall')
plt.legend()
plt.title('Lag Plot of Maximum Temperature')
plt.show()


In [None]:
values = differenced_data.values.reshape(-1, 3)
values

In [None]:
values.shape

In [None]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 3))
scaled_values= scaler.fit_transform(values)
scaled_values

In [None]:
scaled_values.shape

In [None]:
# Split the data into training and testing sets
train_size = int(len(scaled_values) * 0.8)
train_data, test_data = scaled_values[:train_size], scaled_values[train_size:]

In [None]:
train_data.shape

In [None]:
test_data.shape

In [None]:
# Prepare the data for LSTM input
def create_sequences(scaled_values, seq_length):
    X, y = [], []
    for i in range(len(scaled_values) - seq_length):
        X.append(scaled_values[i:i+seq_length])
        y.append(scaled_values[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 7  # Length of input sequences
train_X, train_y = create_sequences(train_data, seq_length)
test_X, test_y = create_sequences(test_data, seq_length)

In [None]:
train_X.shape

In [None]:
train_y.shape

In [None]:
train_X

In [None]:
train_y

In [None]:
test_X.shape

In [None]:
test_y.shape

In [None]:
test_X

In [None]:
test_y

In [None]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(LSTM(50))
model.add(Dense(train_y.shape[1]))  # Number of target variables

# Compile the model
model.compile(optimizer='adam', loss='mse')
model.summary()

In [None]:
# Train the LSTM model
model.fit(train_X, train_y,verbose=1)

# Make predictions on test data
predictions = model.predict(test_X)

In [None]:
predictions.shape

In [None]:
predictions

In [None]:
predictions=pd.DataFrame(predictions)
# Define the new column names
new_names = ['Predicted_Rainfall','Predicted_Tmin', 'Predicted_Tmax']
# Rename the columns
predictions.rename(columns=dict(zip(predictions.columns, new_names)), inplace=True)
predictions

In [None]:
test_y=pd.DataFrame(test_y)
new_names = ['Actual Rainfall','Actual Tmin', 'Actual Tmax']
# Rename the columns
test_y.rename(columns=dict(zip(test_y.columns, new_names)), inplace=True)
test_y

In [None]:
ActualVsPredicted = pd.concat([test_y, predictions],axis=1)
ActualVsPredicted

In [None]:
# Calculate evaluation metrics
mae = mean_absolute_error(test_y,predictions)
mse = mean_squared_error(test_y,predictions)
rmse = mean_squared_error(test_y,predictions, squared=False)
mape = mean_absolute_percentage_error(test_y,predictions)

# Print the evaluation results
print('Mean Absolute Error (MAE):', mae)
print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Percentage Error (MAPE):', mape)

TUNING

TUNING NUMBER OF EPOCHS

In [None]:
# Define your range of epochs to try
epochs_range = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]

best_test_loss = float('inf')
best_epochs = None

# Iterate over the epochs range
for epochs in epochs_range:
    print(f"Testing epochs: {epochs}")
    # Create and compile the LSTM model
    model2 = Sequential()
    model2.add(LSTM(50, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
    model2.add(LSTM(50))
    model2.add(Dense(train_y.shape[1]))  # Number of target variables
    model2.compile(loss='mse')

    # Train the model
    history = model2.fit(train_X, train_y, epochs=epochs, verbose=0)

    # Evaluate the model on the test set
    test_loss = model2.evaluate(test_X, test_y)

    # Check if the current model has the best test loss
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_epochs = epochs

TUNING BATCH SIZE

In [None]:
# Define a list of batch sizes to try
batch_sizes = [16, 32, 64, 128]
best_test_loss = float('inf')
best_batch_size = None
# Iterate over each batch size
for batch_size in batch_sizes:
    print(f"Testing batch size: {batch_size}")

    # Create an LSTM model
    model3 = Sequential()
    model3.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 3)))
    model3.add(LSTM(50))
    model3.add(Dense(1))
    model3.compile(loss='mse')

    # Train the model
    model3.fit(train_X, train_y, batch_size=batch_size, validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model3.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_batch_size = batch_size

TUNING OPTIMIZER

In [None]:
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

# Define a list of optimizers to try
optimizers = ['SGD', 'RMSprop', 'Adam']

best_test_loss = float('inf')
best_optimizer = None

# Iterate over each optimizer
for optimizer in optimizers:
    print(f"Testing optimizer: {optimizer}")

    # Create an LSTM model
    model4 = Sequential()
    model4.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 3)))
    model4.add(LSTM(50))
    model4.add(Dense(1))
    model4.compile(loss='mse')

    # Train the model
    model4.fit(train_X, train_y,validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model4.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_optimizer = optimizer


TUNING ACTIVATION FUNCTION

In [None]:
# Define a list of optimizers to try
activation_functions = ['relu', 'tanh', 'sigmoid']

best_test_loss = float('inf')
best_activation_function = None

# Iterate over each optimizer
for activation_function in activation_functions:
    print(f"Testing activation_function: {activation_function}")

    # Create an LSTM model
    model5 = Sequential()
    model5.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 3)))
    model5.add(LSTM(50))
    model5.add(Dense(1))
    model5.compile(optimizer=optimizer, loss='mse')

    # Train the model
    model5.fit(train_X, train_y,validation_data=(test_X, test_y), verbose=0)

    # Evaluate the model
    test_loss = model5.evaluate(test_X, test_y)

    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_activation_function = activation_function


MODEL BUILDING AFTER TUNING

In [None]:
tuned_model = Sequential()
tuned_model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
tuned_model.add(LSTM(50, activation='relu'))
tuned_model.add(Dense(train_y.shape[1]))  # Number of target variables

# Compile the model
tuned_model.compile(optimizer='SGD', loss='mse')
tuned_model.summary()

In [None]:
# Train the LSTM model
tuned_model.fit(train_X, train_y, epochs=50, batch_size=16, verbose=1)

# Make predictions on test data
new_predictions = tuned_model.predict(test_X)

In [None]:
# Calculate evaluation metrics
mae = mean_absolute_error(test_y,new_predictions)
mse = mean_squared_error(test_y,new_predictions)
rmse = mean_squared_error(test_y,new_predictions, squared=False)
mape = mean_absolute_percentage_error(test_y,new_predictions)

# Print the evaluation results
print('Mean Absolute Error (MAE):', mae)
print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Percentage Error (MAPE):', mape)

In [None]:
new_predictions=pd.DataFrame(new_predictions)
column_index = 0
current_name = new_predictions.columns[column_index]
# Define the new column name
new_name = 'new_Predicted_Tmin'
# Rename the column
new_predictions.rename(columns={current_name: new_name}, inplace=True)

In [None]:
ActualVsNewPredicted = pd.concat([test_y, new_predictions],axis=1)
ActualVsNewPredicted