# Local IDS Using LSTM (BoT-IoT Dataset)****

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Ensure TensorFlow Uses GPU (If Available)
print("Checking GPU availability...")
physical_devices = tf.config.list_physical_devices("GPU")
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print(f"GPU Available: {physical_devices[0]}")
    except RuntimeError as e:
        print(f"GPU Memory Growth Could Not Be Set: {e}")
else:
    print("No GPU found. Running on CPU.")

# ------------------------------------
# Load & Preprocess the BoT-IoT Dataset
# ------------------------------------

# Path to dataset folder
dataset_folder = "/kaggle/input/bot-iot"

# List all CSV files in the folder
file_paths = [os.path.join(dataset_folder, file) for file in os.listdir(dataset_folder) if file.endswith(".csv")]

# Load and combine all BoT-IoT datasets
all_dfs = []
for file in file_paths:
    df = pd.read_csv(file)
    df["Dataset"] = file  # Track dataset source
    all_dfs.append(df)

# Merge all datasets
bot_iot_df = pd.concat(all_dfs, ignore_index=True)

# Standardize column names
bot_iot_df.columns = bot_iot_df.columns.str.strip().str.replace(' ', '_').str.replace('/', '_')

# Print available columns
print("Available Columns:", bot_iot_df.columns.tolist())

# Define Correct Feature List
selected_features = [
    "Flow_Duration", "Tot_Fwd_Pkts", "Tot_Bwd_Pkts", "Fwd_Pkt_Len_Mean",
    "Bwd_Pkt_Len_Mean", "Flow_Byts_s", "Flow_Pkts_s", "Bwd_Pkts_s",
    "Bwd_Pkt_Len_Max", "SDN_Priority"
]

# Check for missing columns before processing
missing_cols = [col for col in selected_features if col not in bot_iot_df.columns]
if missing_cols:
    raise ValueError(f"Missing Columns: {missing_cols}. Update 'selected_features' list to match dataset.")

# Encode Labels
label_encoder = LabelEncoder()
bot_iot_df["Label"] = label_encoder.fit_transform(bot_iot_df["Label"])

# Standardize numeric features
scaler = StandardScaler()
X = scaler.fit_transform(bot_iot_df[selected_features])
y = tf.keras.utils.to_categorical(bot_iot_df["Label"])  # One-hot encode labels

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape inputs to be 3D for LSTM (batch_size, time_steps=1, features)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# ------------------------------------
# Build the LSTM-Based Local IDS Model
# ------------------------------------

lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(1, X.shape[1])),
    Dropout(0.2),
    BatchNormalization(),

    LSTM(32, return_sequences=False),
    Dropout(0.2),
    BatchNormalization(),

    Dense(64, activation="relu"),
    Dropout(0.3),
    Dense(y.shape[1], activation="softmax")  # Multi-class classification
])

# Compile Model
lstm_model.compile(optimizer=Adam(learning_rate=0.001), loss="categorical_crossentropy", metrics=["accuracy"])

# ------------------------------------
# Train the Model (Using GPU Acceleration)
# ------------------------------------
print("Training LSTM-Based Local IDS...")
history = lstm_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, batch_size=32)

# ------------------------------------
# Evaluate & Compare with Paperâ€™s Results
# ------------------------------------
# Predict on test data
y_pred = np.argmax(lstm_model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

# Print Accuracy & Classification Report
accuracy = accuracy_score(y_true, y_pred)
print(f"Local IDS Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix - Local IDS LSTM Model")
plt.show()
