# Spectral Attention Block with Classification 

In [1]:
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models

# Function to read labels from xlsx
def read_labels(xlsx_path):
    try:
        labels_df = pd.read_excel(xlsx_path)
        if 'Labels' in labels_df.columns:
            labels = {idx: label for idx, label in enumerate(labels_df['Labels'], start=1)}
            return labels
        else:
            print(f"Error: 'Labels' column not found in {xlsx_path}.")
            return {}
    except Exception as e:
        print(f"Error reading {xlsx_path}: {e}")
        return {}

# Base directory path where the session folders are located
base_directory = '/kaggle/input/seed-iv-dataset/Seed iv'

# List of session folders
session_folders = [
    "individual_trials_session_1_padded",
   "individual_trials_session_2_padded",
   "individual_trials_session_3_padded"
]

# Example paths to your Excel files
xlsx_paths = [
    '/kaggle/input/seed-iv-dataset/Seed iv/session1_labels.xlsx',
    '/kaggle/input/seed-iv-dataset/Seed iv/session2_labels.xlsx',
    '/kaggle/input/seed-iv-dataset/Seed iv/session3_labels.xlsx'
]

# Initialize a dictionary to store labels
all_labels = {}

# Iterate over each Excel file to read labels
for idx, xlsx_path in enumerate(xlsx_paths, start=1):
    session_name = f"Session_{idx}"
    labels = read_labels(xlsx_path)
    all_labels[session_name] = labels

# Function to load EEG data (simplified example)
def load_eeg_data(mat_file_path):
    mat_data = loadmat(mat_file_path)
    eeg_data = mat_data['merged_data']  # Assuming 'merged_data' is the key in your .mat file
    return eeg_data

# Function to prepare dataset for training
def prepare_dataset(base_directory, session_folders, all_labels):
    X = []
    y = []

    for session_folder in session_folders:
        session_path = os.path.join(base_directory, session_folder)

        for participant_num in range(1, 16):
            participant_folder = f"Participant_{participant_num}"
            participant_path = os.path.join(session_path, participant_folder)

            for trial_num in range(1, 25):
                mat_file = f"Trial_{trial_num}.mat"
                mat_file_path = os.path.join(participant_path, mat_file)

                eeg_data = load_eeg_data(mat_file_path)

                # Example of extracting label
                session_name = f"Session_{session_folders.index(session_folder) + 1}"
                labels = all_labels.get(session_name, {})
                label = labels.get(trial_num, None)

                if label is not None:
                    X.append(eeg_data) 
                    y.append(label)

    X = np.array(X)
    y = np.array(y)

    return X, y

# Prepare dataset
X, y = prepare_dataset(base_directory, session_folders, all_labels)

2024-09-19 12:21:27.592959: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-19 12:21:27.593132: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-19 12:21:27.787376: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.utils.class_weight import compute_class_weight


# Encoding class labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

num_samples, channels, epochs, features = X.shape
X_reshaped = X.reshape(num_samples, channels * epochs * features)  # Flatten to 2D

# Step 3: Define the preprocessing pipeline
preprocessing_pipeline = ImbPipeline(steps=[
    ('imputation', SimpleImputer(strategy='mean')),
    ('scaling', StandardScaler())
])

# Apply preprocessing pipeline to training data
X_preprocessed = preprocessing_pipeline.fit_transform(X_reshaped)

X_preprocessed_reshaped = X_preprocessed.reshape(num_samples, channels, epochs, features)

# Step 1: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed_reshaped, y, test_size=0.2, stratify=y, random_state=42)

# Reshape data to 2D for SMOTE
num_samples, channels, epochs, features = X_train.shape
X_train_reshaped = X_train.reshape(num_samples, channels * epochs * features)  # Flatten to 2D

# Apply SMOTE
smote = SMOTE(random_state=42)
X_train_balanced_reshaped, y_train_balanced = smote.fit_resample(X_train_reshaped, y_train)

# Reshape back to original dimensions
num_samples_balanced = X_train_balanced_reshaped.shape[0]
X_train_balanced = X_train_balanced_reshaped.reshape(num_samples_balanced, channels, epochs, features)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM, GRU
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(GRU(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(GRU(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Create the complete model with only the Spectral Attention Block
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    # Flatten the output
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 4
    rnn_units_spectral = 256
    seq_len_spectral = 64
    rnn_units = 128
    mlp_units = 128
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

   

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train_balanced, y_train_balanced,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


Epoch 1/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m612s[0m 22s/step - accuracy: 0.4009 - loss: 3.5626 - val_accuracy: 0.5463 - val_loss: 2.9793 - learning_rate: 1.0000e-04
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m604s[0m 22s/step - accuracy: 0.5974 - loss: 2.0336 - val_accuracy: 0.7130 - val_loss: 1.8917 - learning_rate: 1.0000e-04
Epoch 3/50
[1m 8/27[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m6:29[0m 21s/step - accuracy: 0.7346 - loss: 1.7563

In [None]:
#s-3 (Fully Functionaing and working)
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM, GRU
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(GRU(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(GRU(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 5
    rnn_units_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 5
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 256
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    
    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train_balanced, y_train_balanced,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

Epoch 1/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1477s[0m 53s/step - accuracy: 0.4834 - loss: 3.0842 - val_accuracy: 0.6250 - val_loss: 2.3271 - learning_rate: 1.0000e-04
Epoch 2/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1431s[0m 52s/step - accuracy: 0.6286 - loss: 1.9773 - val_accuracy: 0.7222 - val_loss: 1.8502 - learning_rate: 1.0000e-04
Epoch 3/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1391s[0m 52s/step - accuracy: 0.7030 - loss: 1.7680 - val_accuracy: 0.7361 - val_loss: 1.6652 - learning_rate: 1.0000e-04
Epoch 4/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1384s[0m 51s/step - accuracy: 0.7391 - loss: 1.6197 - val_accuracy: 0.7176 - val_loss: 1.6999 - learning_rate: 1.0000e-04
Epoch 5/50
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1394s[0m 52s/step - accuracy: 0.7776 - loss: 1.5826 - val_accuracy: 0.8009 - val_loss: 1.5857 - learning_rate: 1.0000e-04
Epoch 6/50
[1m14/27[0m [32m━━━━━━━━━━[0m

In [None]:
#s-2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns       
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 5
    rnn_units_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 5
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 256
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    
    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train_balanced, y_train_balanced,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
#s-1
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 5
    rnn_units_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 5
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 256
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    
    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train_balanced, y_train_balanced,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
#s1,2,3
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Spatial Attention Block with RNN
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'rnn': RNNLayer(embed_size, hidden_dim, rnn_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['rnn'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Updated Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 5
    num_heads_spectral = 4  # Increased heads for potentially better attention
    rnn_units_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 5 
    num_heads_spatial = 4  # Increased heads for potentially better attention
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128  # RNN units for the Spatial Attention Block
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading (replace with actual data loading code)
    # X_train, X_test, y_train, y_test = ...
  #  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
  #  X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
 #   X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train_balanced, y_train_balanced,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_standardized = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)

In [None]:
#s1
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 4
    rnn_units_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 4
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading (replace with actual data loading code)
    # X_train, X_test, y_train, y_test = ...
    # Placeholder data (replace with actual data)
    #X = np.random.random((1000, 62, 64, 20))  # Example input data
    #y = np.random.randint(0, num_classes, 1000)  # Example labels
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
    X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
    X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)

In [None]:
from imblearn.over_sampling import SMOTE

# Initialize SMOTE
smote = SMOTE()

# Fit and transform the training data
X_resampled, y_resampled = smote.fit_resample(X.reshape(X.shape[0], -1), y)

# Reshape back to original shape if needed
X_resampled = X_resampled.reshape(-1, 62, 64, 20)


In [None]:
#s1
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 4
    rnn_units_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 4
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading (replace with actual data loading code)
    # X_train, X_test, y_train, y_test = ...
    # Placeholder data (replace with actual data)
    #X = np.random.random((1000, 62, 64, 20))  # Example input data
    #y = np.random.randint(0, num_classes, 1000)  # Example labels
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
    X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
    X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
#2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 4
    rnn_units_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 4
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model2 = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model2.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
    X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
    X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model2.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model2.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
#3
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Ensemble Layer (RNN + MLP)
class EnsembleLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units, mlp_units):
        super(EnsembleLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense_rnn = Dense(embed_size, activation='relu')
        
        self.mlp = tf.keras.Sequential([
            Dense(mlp_units, activation='relu'),
            Dense(embed_size, activation='relu')
        ])
        
        self.final_dense = Dense(embed_size, activation='relu')

    def call(self, x):
        # Process through RNN path
        rnn_out = self.rnn(x)
        rnn_out = self.dense_rnn(rnn_out)
        
        # Process through MLP path
        mlp_out = self.mlp(x)
        
        # Combine both paths
        combined = tf.concat([rnn_out, mlp_out], axis=-1)
        combined = self.final_dense(combined)
        
        return combined

# Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Ensemble Layer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units, mlp_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'ensemble': EnsembleLayer(embed_size, hidden_dim, rnn_units, mlp_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['ensemble'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units, mlp_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Example data dimensions and parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 3
    num_heads_spectral = 4
    rnn_units_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 4
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128
    mlp_units = 64
    num_classes = 4

    # Create and compile model
    model3 = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, mlp_units, num_classes)

    model3.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
    X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
    X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model3.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model3.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

 
# Reshape to 2D: (samples * channels * epochs, features)
X_reshaped = X.reshape(-1, X.shape[-1])

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reshaped)

# Fit PCA
pca = PCA()
pca.fit(X_scaled)

# Calculate explained variance ratio and cumulative variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_variance_ratio = np.cumsum(explained_variance_ratio)

# Plot cumulative explained variance
plt.figure(figsize=(8, 6))
plt.plot(cumulative_variance_ratio, marker='o')
plt.axhline(y=0.90, color='r', linestyle='--', label='90% Variance')
plt.axhline(y=0.95, color='g', linestyle='--', label='95% Variance')
plt.title('Cumulative Explained Variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.legend()
plt.grid(True)
plt.show()

# Find the number of components to explain 90% of the variance
n_components_90 = np.argmax(cumulative_variance_ratio >= 0.90) + 1
print(f'Number of components to explain 90% variance: {n_components_90}')

# Find the number of components to explain 95% of the variance
n_components_95 = np.argmax(cumulative_variance_ratio >= 0.95) + 1
print(f'Number of components to explain 95% variance: {n_components_95}')

# Apply PCA transformation
pca = PCA(n_components=n_components_95)  # You can use n_components_95 if needed
X_pca = pca.fit_transform(X_scaled)

# Reshape PCA output back to 4D-like structure
X_pca_reshaped = X_pca.reshape(X.shape[0], X.shape[1], X.shape[2], -1)

print(f"Original shape: {X.shape}")
print(f"PCA transformed shape: {X_pca_reshaped.shape}")


In [None]:
#s- 3
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, Bidirectional, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = Bidirectional(LSTM(rnn_units, return_sequences=True))
        self.dense = Dense(embed_size, activation='relu')

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Spatial Attention Block with RNN
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'rnn': RNNLayer(embed_size, hidden_dim, rnn_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['rnn'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Updated Spectral Attention Block with RNNLayer
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, rnn_units, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Trainable Positional Encoding
        self.positional_encoding = self.add_weight(
            shape=(seq_len, input_dim),
            initializer='uniform',
            trainable=True,
            name='positional_encoding'
        )

        # Multi-Head Attention Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]

        # RNN Layers
        self.rnn_layers = [
            RNNLayer(input_dim, rnn_units, rnn_units) for _ in range(num_layers)
        ]
        
        # Layer Normalizations
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_rnn = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            
            rnn_output = self.rnn_layers[l](x)
            x = self.layer_norms_rnn[l](rnn_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Parameters
    input_shape = (62, 64, 5)
    num_layers_spectral = 3
    num_heads_spectral = 4  # Increased heads for potentially better attention
    rnn_units_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 3
    num_heads_spatial = 4  # Increased heads for potentially better attention
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 128  # RNN units for the Spatial Attention Block
    num_classes = 4

    # Create and compile model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, rnn_units_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.0001, weight_decay=1e-5),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading (replace with actual data loading code)
    # X_train, X_test, y_train, y_test = ...
    X_train, X_test, y_train, y_test = train_test_split(X_pca_reshaped, y, test_size=0.2, random_state=42)

    # Normalize data if necessary
    X_train = X_train / np.max(X_train, axis=(0,1,2), keepdims=True)
    X_test = X_test / np.max(X_test, axis=(0,1,2), keepdims=True)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Spectral Attention Block
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Positional Encoding
        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        # MHA and MLP Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1]) 
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# FeedForward Layer
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim):
        super(FeedForward, self).__init__()
        self.fc1 = Dense(hidden_dim, activation='relu')
        self.fc2 = Dense(embed_size)
        
    def call(self, x):
        return self.fc2(self.fc1(x))

# Spatial Attention Block
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, sequence_length, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'mlp': FeedForward(embed_size, hidden_dim),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        pos_encoding = tf.cast(self.positional_encoding, dtype=x.dtype)
        x = x + pos_encoding
        
        for layer in self.layers:
            x_res = x
            x = layer['mha'](query=x, value=x, key=x)
            x = layer['ln1'](x + x_res)
            x_res = x
            x = layer['mlp'](x)
            x = layer['ln2'](x + x_res)
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Apply Spectral Attention Block
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral)(inputs)
    
    # Apply Spatial Attention Block
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral)(x)
    
    # Classification Head
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Define parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 1
    num_heads_spectral = 2
    mlp_dim_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 1
    num_heads_spatial = 2
    hidden_dim_spatial = 128
    num_channels_spatial = 62
    num_classes = 4

    # Create the model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_classes)

    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Load and preprocess your data here
    # X_train, X_test, y_train, y_test = ...
    
     # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Transformer Layer
class TransformerLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, num_heads, ff_dim):
        super(TransformerLayer, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_size)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_size)
        ])
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.dropout = tf.keras.layers.Dropout(0.1)

    def call(self, x):
        # Attention
        attn_output = self.attention(query=x, value=x, key=x)
        attn_output = self.dropout(attn_output)
        out1 = self.layernorm1(x + attn_output)
        
        # Feed Forward
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout(ffn_output)
        out2 = self.layernorm2(out1 + ffn_output)
        
        return out2

# Spectral Attention Block
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Spatial Attention Block with Transformer
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, num_heads_transformer, ff_dim):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'transformer': TransformerLayer(embed_size, num_heads_transformer, ff_dim),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['transformer'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_heads_transformer, ff_dim, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, num_heads_transformer, ff_dim)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    input_shape = (62, 64, 20)
    num_layers_spectral = 1
    num_heads_spectral = 2
    mlp_dim_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 1
    num_heads_spatial = 2
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    num_heads_transformer = 4  # Number of heads for the Transformer
    ff_dim = 128  # Feed-forward dimension for the Transformer
    num_classes = 4

    # Create the model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_heads_transformer, ff_dim, num_classes)

    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading (replace with your actual data)
    X = np.random.rand(100, *input_shape)  # Dummy data
    y = np.random.randint(0, num_classes, 100)  # Dummy labels
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=64,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    print(classification_report(y_test, y_pred))
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    cm = confusion_matrix(y_test, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
# Session 1,2,3- RNN

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention, SimpleRNN
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# RNN Layer
class RNNLayer(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim, rnn_units):
        super(RNNLayer, self).__init__()
        self.rnn = SimpleRNN(rnn_units, return_sequences=True)
        self.dense = Dense(embed_size)

    def call(self, x):
        x = self.rnn(x)
        x = self.dense(x)
        return x

# Spatial Attention Block with RNN
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length, rnn_units):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'rnn': RNNLayer(embed_size, hidden_dim, rnn_units),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        sequence_length = tf.shape(x)[2]
        embed_size = tf.shape(x)[3]
        
        x_reshaped = tf.reshape(x, (batch_size * sequence_length, num_channels, embed_size))
        pos_encoding = tf.tile(self.positional_encoding[tf.newaxis, :, :], [batch_size * sequence_length, 1, 1])
        x_reshaped += pos_encoding
        
        for layer in self.layers:
            x_res = x_reshaped
            x_reshaped = layer['mha'](query=x_reshaped, value=x_reshaped, key=x_reshaped)
            x_reshaped = layer['ln1'](x_reshaped + x_res)
            
            x_res = x_reshaped
            x_reshaped = layer['rnn'](x_reshaped)
            x_reshaped = layer['ln2'](x_reshaped + x_res)
        
        x_reshaped = tf.reshape(x_reshaped, (batch_size, sequence_length, num_channels, embed_size))
        x = tf.transpose(x_reshaped, perm=[0, 2, 1, 3])
        
        return x

# Spectral Attention Block (unchanged)
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral)(inputs)
    
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral, rnn_units)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    input_shape = (62, 64, 20)
    num_layers_spectral = 1
    num_heads_spectral = 2
    mlp_dim_spectral = 256
    seq_len_spectral = 64
    num_layers_spatial = 1
    num_heads_spatial = 2
    hidden_dim_spatial = 256
    num_channels_spatial = 62
    rnn_units = 256  # Set the number of units for the RNN
    num_classes = 4

    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, rnn_units, num_classes)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Example data loading
    # X_train, X_test, y_train, y_test = ...
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=64,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    print(classification_report(y_test, y_pred))
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    cm = confusion_matrix(y_test, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Define the Spectral Attention Block
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len  # Number of frames or time points
        self.input_dim = input_dim  # Dimension of frequency domain features

        # Positional Encoding for frequency domain features
        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:  # Handle odd input_dim
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        # Multi-Head Attention and MLP layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        
        # Layer Normalization for MHA and MLP
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        # Add positional encoding to the input tensor
        x += self.positional_encoding
        
        # Store attention weights for visualization
        all_attention_weights = []

        # Get the dynamic shape of the input tensor
        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        # Reshape input to [batch_size * channels, seq_len, input_dim]
        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            # Multi-Head Attention
            x_transposed = tf.transpose(x, perm=[0, 2, 1]) 
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)

            # Transpose back to original shape
            x = tf.transpose(x_transposed, perm=[0, 2, 1]) 

            # Multi-Layer Perceptron
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        # Reshape back to (batch_size, num_channels, seq_len, input_dim)
        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        
        return output, all_attention_weights  # Return attention weights

# Create the complete model with the Spectral Attention Block
def create_model(input_shape, num_layers, num_heads, mlp_dim, seq_len, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers, num_heads, mlp_dim, seq_len)(inputs)
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)  # L2 Regularization
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)  # Adjusted Dropout Rate
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Define parameters
    input_shape = (62, 64, 20)  # Shape of each input sample
    num_layers = 3  # Reduced number of layers
    num_heads = 4  # Reduced number of attention heads
    mlp_dim = 256   # Dimension of the MLP
    seq_len = 64    # Number of time points
    num_classes = 4 # Assuming 4 classes

    # Create the model
    model = create_model(input_shape, num_layers, num_heads, mlp_dim, seq_len, num_classes)

    # Compile the model with the built-in cross-entropy loss function
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),  # Use built-in loss function
                  metrics=['accuracy'])

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model with Early Stopping and ReduceLROnPlateau
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

    history = model.fit(X_train, y_train,  # Use integer labels directly
                        validation_data=(X_test, y_test), 
                        epochs=50, 
                        batch_size=64, 
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)

    # Print classification report
    print(classification_report(y_test, y_pred))

    # Calculate and print accuracy 
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')

    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')  # Last epoch's training accuracy
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')  # Last epoch's validation accuracy

    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()

    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()

    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')  
    plt.legend(loc='upper left')
    plt.show()

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Spectral Attention Block
class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Positional Encoding
        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        # MHA and MLP Layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        x += self.positional_encoding
        all_attention_weights = []

        batch_size = tf.shape(x)[0]
        num_channels = tf.shape(x)[1]
        seq_len = tf.shape(x)[2]
        input_dim = tf.shape(x)[3]

        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            x_transposed = tf.transpose(x, perm=[0, 2, 1]) 
            attn_output, attn_weights = self.mha_layers[l](x_transposed, x_transposed, return_attention_scores=True)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            all_attention_weights.append(attn_weights)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        return output, all_attention_weights

# FeedForward Layer
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim):
        super(FeedForward, self).__init__()
        self.fc1 = Dense(hidden_dim, activation='relu')
        self.fc2 = Dense(embed_size)
        
    def call(self, x):
        return self.fc2(self.fc1(x))

# Spatial Attention Block
class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.positional_encoding = self.add_weight(
            shape=(num_channels, sequence_length, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'mlp': FeedForward(embed_size, hidden_dim),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        pos_encoding = tf.cast(self.positional_encoding, dtype=x.dtype)
        x = x + pos_encoding
        
        for layer in self.layers:
            x_res = x
            x = layer['mha'](query=x, value=x, key=x)
            x = layer['ln1'](x + x_res)
            x_res = x
            x = layer['mlp'](x)
            x = layer['ln2'](x + x_res)
        
        return x

# Create the complete model with both attention blocks
def create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                 num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Apply Spectral Attention Block
    x, _ = SpectralAttentionBlock(input_shape[2], num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral)(inputs)
    
    # Apply Spatial Attention Block
    x = SpatialAttentionBlock(input_shape[2], num_heads_spatial, hidden_dim_spatial, num_layers_spatial, num_channels_spatial, seq_len_spectral)(x)
    
    # Classification Head
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs, outputs)

# Main execution
if __name__ == "__main__":
    # Define parameters
    input_shape = (62, 64, 20)
    num_layers_spectral = 1
    num_heads_spectral = 2
    mlp_dim_spectral = 128
    seq_len_spectral = 64
    num_layers_spatial = 1
    num_heads_spatial = 2
    hidden_dim_spatial = 128
    num_channels_spatial = 62
    num_classes = 4

    # Create the model
    model = create_model(input_shape, num_layers_spectral, num_heads_spectral, mlp_dim_spectral, seq_len_spectral,
                         num_layers_spatial, num_heads_spatial, hidden_dim_spatial, num_channels_spatial, num_classes)

    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Load and preprocess your data here
    # X_train, X_test, y_train, y_test = ...
    
     # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


    # Train the model
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping, reduce_lr])

    # Evaluate the model performance
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    
    # Print classification report
    print(classification_report(y_test, y_pred))
    
    # Calculate and print accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy on Test Data: {accuracy:.2f}')
    
    # Print training and validation accuracy from history
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    print(f'Training Accuracy: {train_accuracy[-1]:.2f}')
    print(f'Validation Accuracy: {val_accuracy[-1]:.2f}')
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(num_classes), yticklabels=np.arange(num_classes))
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Plot training & validation accuracy
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()
    
    # Plot training & validation loss
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
import tensorflow as tf
from tensorflow.keras import layers

class SpectralAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, seq_len):
        super(SpectralAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.input_dim = input_dim

        # Positional Encoding
        position = tf.range(seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:  # Handle odd input_dim
            positional_encoding = positional_encoding[:, :input_dim]
        self.positional_encoding = tf.Variable(positional_encoding, trainable=False, name='positional_encoding')

        # Multi-Head Attention and MLP layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        
        # Layer Normalization
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        # Add positional encoding
        x += self.positional_encoding
        
        # Reshape input
        batch_size, num_channels, seq_len, input_dim = tf.shape(x)
        x = tf.reshape(x, [-1, seq_len, input_dim])

        for l in range(self.num_layers):
            # Multi-Head Attention
            x_transposed = tf.transpose(x, perm=[0, 2, 1])
            attn_output = self.mha_layers[l](x_transposed, x_transposed)
            x_transposed = self.layer_norms_mha[l](attn_output + x_transposed)
            x = tf.transpose(x_transposed, perm=[0, 2, 1])

            # Multi-Layer Perceptron
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)
 
        output = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])
        
        return output

# Example usage
input_dim = 20   # Dimension of frequency domain features
num_layers = 10   # Number of layers
num_heads = 14   # Number of attention heads
mlp_dim = 256  # Dimension of MLP
seq_len = 64     # Number of frames or time points

# Create an instance of the spectral attention block
spectral_attention_block = SpectralAttentionBlock(input_dim, num_layers, num_heads, mlp_dim, seq_len)
 
# Forward pass through the block
output = spectral_attention_block(X)

# Print shapes
print("Output shape:", output.shape)  # Expected: (24, 62, 64, 20)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers

class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, input_dim, num_layers, num_heads, mlp_dim, num_channels):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        self.num_channels = num_channels
        self.input_dim = input_dim

        # Positional Encoding for spatial information
        position = tf.range(num_channels, dtype=tf.float32)[:, tf.newaxis]  # Shape (num_channels, 1)
        div_term = tf.exp(tf.range(0, input_dim, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / input_dim))
        positional_encoding = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
        if input_dim % 2 != 0:  # Handle odd input_dim
            positional_encoding = positional_encoding[:, :input_dim]

        # Reshape positional encoding to match batch and time dimensions
        self.positional_encoding = tf.Variable(positional_encoding[tf.newaxis, :, tf.newaxis, :], trainable=False, name='positional_encoding')  # Shape (1, num_channels, 1, input_dim)

        # Multi-Head Attention and MLP layers
        self.mha_layers = [
            layers.MultiHeadAttention(
                num_heads=num_heads, 
                key_dim=input_dim,
                kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)
            ) for _ in range(num_layers)
        ]
        self.mlp_layers = [
            tf.keras.Sequential([
                layers.Dense(mlp_dim, activation='relu', kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42)),
                layers.Dense(input_dim, kernel_initializer=tf.keras.initializers.GlorotUniform(seed=42))
            ]) for _ in range(num_layers)
        ]
        
        # Layer Normalization
        self.layer_norms_mha = [layers.LayerNormalization() for _ in range(num_layers)]
        self.layer_norms_mlp = [layers.LayerNormalization() for _ in range(num_layers)]

    def call(self, x):
        # Add positional encoding
        x += self.positional_encoding  # Shape: (batch_size, num_channels, 64, input_dim)

        # Reshape input
        batch_size, num_channels, seq_len, input_dim = tf.shape(x)
        x = tf.reshape(x, [batch_size * seq_len, num_channels, input_dim])  # Flatten time dimension

        for l in range(self.num_layers):
            # Multi-Head Attention
            attn_output, _ = self.mha_layers[l](x, x, return_attention_scores=True)  # Self-attention on channel dimension
            x = self.layer_norms_mha[l](attn_output + x)

            # MLP
            mlp_output = self.mlp_layers[l](x)
            x = self.layer_norms_mlp[l](mlp_output + x)

        # Reshape back to original dimensions
        x = tf.reshape(x, [batch_size, num_channels, seq_len, input_dim])  # Shape: (batch_size, seq_len, num_channels, input_dim)

        return x

# Example usage
input_dim = 20   # Dimension of frequency domain features
num_layers = 4   # Number of layers
num_heads = 8    # Number of attention heads
mlp_dim = 128    # Dimension of MLP
num_channels = 62  # Number of channels

# Create an instance of the spatial attention block
spatial_attention_block = SpatialAttentionBlock(input_dim, num_layers, num_heads, mlp_dim, num_channels)

# Example input tensor
batch_size = 24
seq_len = 64  # Number of frames or time points
 
# Forward pass through the block
output = spatial_attention_block(output)

# Print output shape
print("Output shape:", output.shape)  # Expected: (24, 64, 62, 20)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model
import numpy as np

class FeedForward(tf.keras.layers.Layer):
    def __init__(self, embed_size, hidden_dim):
        super(FeedForward, self).__init__()
        self.fc1 = Dense(hidden_dim, activation='relu')
        self.fc2 = Dense(embed_size)
        
    def call(self, x):
        return self.fc2(self.fc1(x))

class SpatialAttentionBlock(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length):
        super(SpatialAttentionBlock, self).__init__()
        self.num_layers = num_layers
        
        # Initialize positional encoding with the shape that matches the spatial dimensions
        self.positional_encoding = self.add_weight(
            shape=(num_channels, sequence_length, embed_size),
            initializer='random_normal',
            trainable=True,
            name='positional_encoding'
        )
        
        self.layers = []
        for _ in range(num_layers):
            self.layers.append({
                'mha': MultiHeadAttention(num_heads=heads, key_dim=embed_size),
                'mlp': FeedForward(embed_size, hidden_dim),
                'ln1': LayerNormalization(),
                'ln2': LayerNormalization()
            })

    def call(self, x):
        # Ensure positional encoding has the same shape as input x
        pos_encoding = tf.cast(self.positional_encoding, dtype=x.dtype)
        x = x + pos_encoding
        
        for layer in self.layers:
            x_res = x
            x = layer['mha'](query=x, value=x, key=x)
            x = layer['ln1'](x + x_res)
            
            x_res = x
            x = layer['mlp'](x)
            x = layer['ln2'](x + x_res)
        
        return x

# Example data and model setup
batch_size = 8
num_channels = 62
sequence_length = 64
feature_dim = 20
embed_size = feature_dim
heads = 4
hidden_dim = 128
num_layers = 2

 
# Model instantiation
input_layer = tf.keras.Input(shape=(num_channels, sequence_length, feature_dim))
x_transformed = SpatialAttentionBlock(embed_size, heads, hidden_dim, num_layers, num_channels, sequence_length)(input_layer)

# Define the model to output only the transformed data
model = Model(inputs=input_layer, outputs=x_transformed)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')  # Loss is not used here, just a placeholder

# Predict the transformed output
transformed_output = model.predict(X)
print(f"Transformed output shape: {transformed_output.shape}")
print(transformed_output)


In [None]:
import torch
import torch.nn as nn

class SpatialAttentionBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, mlp_hidden_dim, num_layers, num_frames, positional_encoding):
        super(SpatialAttentionBlock, self).__init__()
        
        self.num_layers = num_layers
        self.num_frames = num_frames
        self.positional_encoding = positional_encoding
        
        self.mha_layers = nn.ModuleList([
            nn.MultiheadAttention(embed_dim, num_heads) for _ in range(num_layers)
        ])
        
        self.mlp_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(embed_dim, mlp_hidden_dim),
                nn.ReLU(),
                nn.Linear(mlp_hidden_dim, embed_dim)
            ) for _ in range(num_layers)
        ])
        
        self.ln_layers = nn.ModuleList([
            nn.LayerNorm(embed_dim) for _ in range(num_layers * 2)  # LN for MHA and MLP
        ])

    def forward(self, x):
        batch_size, num_channels, num_frames, embed_dim = x.size()
        
        # Add positional encoding
        x = x + self.positional_encoding

        # MHA and MLP layers
        for l in range(self.num_layers):
            # Multi-Head Attention
            x_res = x
            x, _ = self.mha_layers[l](x, x, x)
            x = self.ln_layers[2 * l](x + x_res)
            
            # MLP
            x_res = x
            x = self.mlp_layers[l](x)
            x = self.ln_layers[2 * l + 1](x + x_res)
        
        return x

# Example usage
embed_dim = 64  # Dimension of the embeddings
num_heads = 8   # Number of attention heads
mlp_hidden_dim = 128  # Hidden dimension for MLP
num_layers = 2  # Number of layers in the attention block
num_frames = 2  # Number of frames (for demonstration)
num_channels = 62  # Number of EEG channels

# Positional encoding (dummy values for demonstration)
positional_encoding = torch.randn(num_frames, num_channels, embed_dim)

# Instantiate and use the spatial attention block
model = SpatialAttentionBlock(embed_dim, num_heads, mlp_hidden_dim, num_layers, num_frames, positional_encoding)
#x = torch.randn(1, num_channels, num_frames, embed_dim)  # Example input tensor
output = model(x)

print(output.shape)
 