import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
from keras.models import Model
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Add, concatenate, Input
from keras.optimizers import Adam
from keras import regularizers
from bayes_opt import BayesianOptimization
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Read Excel table data
data = pd.read_excel(r"C:\Users\29177\Desktop\KDD (2).xlsx")

# Automatically detects columns of character types
categorical_features = []
for column in data.columns:
    if data[column].dtype == object:
        categorical_features.append(column)

# Performs unique thermal encoding for columns of character type
if categorical_features:
    encoder = OneHotEncoder(sparse=False)
    encoded_features = pd.DataFrame(encoder.fit_transform(data[categorical_features]))

    # Replaces the character column in the original data
    data = data.drop(columns=categorical_features)
    data = pd.concat([encoded_features, data], axis=1)

# Extract features and labels
features = data.iloc[:, :-1].values
labels = data.iloc[:, -1].values

# Normalizes specific columns
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Encode the tag
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)


# Converts the shape of the data again into a three-dimensional array
features = features.reshape((features.shape[0], features.shape[1], 1))
# Define the autoencoder model
input_data = Input(shape=(features.shape[1],))
encoded = Dense(128, activation='relu')(input_data)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(features.shape[1], activation='sigmoid')(decoded)
autoencoder = Model(input_data, decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
autoencoder.fit(features, features, epochs=50, batch_size=256, shuffle=True, validation_split=0.2)
encoder = Model(input_data, encoded)
encoded_features = encoder.predict(features)

def cnn_evaluate(filters1, kernel_size1, pool_size1, filters2, kernel_size2, pool_size2, dense_units, learning_rate):
    # Construct a simplified CNN model
    inputs = Input(shape=(encoded_features.shape[1], 1))
    conv1 = Conv1D(filters=int(filters1), kernel_size=int(kernel_size1), activation='relu',
                   kernel_regularizer=regularizers.l2(0.01))(inputs)
    pool1 = MaxPooling1D(pool_size=int(pool_size1))(conv1)
    conv2 = Conv1D(filters=int(filters2), kernel_size=int(kernel_size2), activation='relu',
                   kernel_regularizer=regularizers.l2(0.01))(pool1)
    pool2 = MaxPooling1D(pool_size=int(pool_size2))(conv2)

    flatten = Flatten()(pool2)
    dense1 = Dense(int(dense_units), activation='relu', kernel_regularizer=regularizers.l2(0.01))(flatten)
    outputs = Dense(len(np.unique(labels)), activation='softmax')(dense1)

    model = Model(inputs=inputs, outputs=outputs)
    # Compilation Model
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    # cross-verify
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []
    for train_index, test_index in kfold.split(encoded_features, labels):
        X_train, X_test = encoded_features[train_index], encoded_features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]

        # The label is uniquely thermally encoded
        num_classes = len(np.unique(labels))
        y_train = np.eye(num_classes)[y_train]
        y_test = np.eye(num_classes)[y_test]

        model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
        y_pred = model.predict(X_test)
        accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))

        scores.append(accuracy)

    # Returns the average accuracy of cross-validation
    return np.mean(scores)
# Define the search space for hyperparameters
pbounds = {'filters1': (16, 64),
           'kernel_size1': (3, 5),
           'pool_size1': (2, 4),
           'filters2': (16, 64),
           'kernel_size2': (3, 5),
           'pool_size2': (2, 4),
           'dense_units': (32, 128),
           'learning_rate': (0.0001, 0.001)}

# Create a Bayesian optimization object
optimizer = BayesianOptimization(f=cnn_evaluate, pbounds=pbounds, verbose=2)

# Run the Bayesian optimization process
optimizer.maximize(init_points=5, n_iter=10)
print(optimizer.max)
