import labels as labels
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from scipy.special import expit

from ICA_mtr1 import I_ICA_Plus


plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.unicode_minus'] = False


excel_path = r"C:\Users\wkc\Desktop\ICA\data1.xlsx"
df = pd.read_excel(excel_path)
df['G_log'] = np.log1p(df['G'])

X = df[['X1', 'X2', 'X3', 'X4']].values
Y = df[['Y5', 'Y6']].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

scaler_X = StandardScaler()
scaler_Y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
Y_train_scaled = scaler_Y.fit_transform(Y_train)


def evaluate(y_true, y_pred):
    rmse = metrics.mean_squared_error(y_true, y_pred, squared=False)
    r2 = metrics.r2_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    error = y_pred - y_true
    return rmse, r2, mae, error


class ImprovedELM:
    def __init__(self, n_hidden=300, n_trials=5, alpha=1e-2):
        self.n_hidden = n_hidden
        self.n_trials = n_trials
        self.alpha = alpha

    def fit(self, X, Y):
        best_loss = np.inf
        for _ in range(self.n_trials):
            W_try = np.random.randn(X.shape[1], self.n_hidden)
            b_try = np.random.randn(self.n_hidden)
            H_try = expit(X @ W_try + b_try)
            model_try = Ridge(alpha=self.alpha, fit_intercept=False)
            model_try.fit(H_try, Y)
            pred = model_try.predict(H_try)
            loss = np.linalg.norm(Y - pred)
            if loss < best_loss:
                best_loss = loss
                self.W, self.b = W_try, b_try
                self.model = model_try

    def predict(self, X):
        H = expit(X @ self.W + self.b)
        return self.model.predict(H)


results = {}

# ICA
ica_model = I_ICA_Plus(
    L_max=52,
    tol=0.001,
    σ=0.005,
    reg_init=1e-2,
    reg_decay=0.98,
    verbose=True
)
ica_model.fit(X_train_scaled, Y_train_scaled)
Y_ica_pred_scaled = ica_model.predict(X_test_scaled)
Y_ica_pred = scaler_Y.inverse_transform(Y_ica_pred_scaled)
results['ARI-ICA'] = {'pred': Y_ica_pred, 'metrics': evaluate(Y_test, Y_ica_pred)}

# BP
bp_model = MLPRegressor(hidden_layer_sizes=(64, 64), max_iter=5000, random_state=42)
bp_model.fit(X_train_scaled, Y_train_scaled)
Y_bp_pred_scaled = bp_model.predict(X_test_scaled)
Y_bp_pred = scaler_Y.inverse_transform(Y_bp_pred_scaled)
results['BP'] = {'pred': Y_bp_pred, 'metrics': evaluate(Y_test, Y_bp_pred)}

# Improved ELM
elm_model = ImprovedELM()
elm_model.fit(X_train_scaled, Y_train_scaled)
Y_elm_pred_scaled = elm_model.predict(X_test_scaled)
Y_elm_pred = scaler_Y.inverse_transform(Y_elm_pred_scaled)
results['ELM'] = {'pred': Y_elm_pred, 'metrics': evaluate(Y_test, Y_elm_pred)}

# SVR
from sklearn.multioutput import MultiOutputRegressor
svr_model = MultiOutputRegressor(SVR(kernel='rbf', C=8, epsilon=0.2))
svr_model.fit(X_train_scaled, Y_train_scaled)
Y_svr_pred_scaled = svr_model.predict(X_test_scaled)
Y_svr_pred = scaler_Y.inverse_transform(Y_svr_pred_scaled)
results['SVR'] = {'pred': Y_svr_pred, 'metrics': evaluate(Y_test, Y_svr_pred)}


true_color = '#005B96'
pred_color = '#D32F2F'
grid_color = '#87CEFA'

for model in results:
    x_axis = np.arange(1, len(Y_test) + 1)
    xticks = list(range(1, len(Y_test) + 1, 5))
    if x_axis[-1] not in xticks:
        xticks.append(x_axis[-1])

    plt.figure(figsize=(10, 4))

    plt.subplot(1, 2, 1)
    plt.plot(x_axis, Y_test[:, 0], linestyle='--', color=true_color, label='True', linewidth=1.5, alpha=0.8)
    plt.plot(x_axis, results[model]['pred'][:, 0], linestyle='-', color=pred_color,
             label=f'{model}-Pred', linewidth=1.5, alpha=0.8)
    plt.title('Coal Slurry Volume Concentration', fontsize=14, fontweight='bold')
    plt.xlabel('Sample Index', fontsize=13, fontweight='bold')
    plt.ylabel('Value', fontsize=13, fontweight='bold')
    plt.xticks(xticks)
    plt.legend(frameon=False, fontsize=10, loc='upper right')
    plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.3, color=grid_color)

    plt.subplot(1, 2, 2)
    plt.plot(x_axis, Y_test[:, 1], linestyle='--', color=true_color, label='True', linewidth=1.5, alpha=0.8)
    plt.plot(x_axis, results[model]['pred'][:, 1], linestyle='-', color=pred_color,
             label=f'{model}-Pred', linewidth=1.5, alpha=0.8)
    plt.title('Particle True Density', fontsize=14, fontweight='bold')
    plt.xlabel('Sample Index', fontsize=13, fontweight='bold')
    plt.ylabel('Value', fontsize=13, fontweight='bold')
    plt.xticks(xticks)
    plt.legend(frameon=False, fontsize=10, loc='upper right')
    plt.grid(True, linestyle='--', linewidth=0.6, alpha=0.3, color=grid_color)

    plt.tight_layout()
    plt.savefig(f'{model}_prediction_paperstyle.svg', format='svg', dpi=1200, bbox_inches='tight')
    plt.close()



labels = list(results.keys())
rmse_vals = [results[m]['metrics'][0] for m in labels]
mae_vals = [results[m]['metrics'][2] for m in labels]
r2_vals = [results[m]['metrics'][1] for m in labels]

fig, ax = plt.subplots(figsize=(10, 5))
x = np.arange(len(labels))
width = 0.25

colors = ['#3399FF', '#FF6666', '#66CC66']

rects1 = ax.bar(x - width, rmse_vals, width, label='RMSE', color=colors[0])
rects2 = ax.bar(x, mae_vals, width, label='MAE', color=colors[1])
rects3 = ax.bar(x + width, r2_vals, width, label='R$^2$', color=colors[2])

for rects in [rects1, rects2, rects3]:
    heights = []
    xpos = []
    for rect in rects:
        height = rect.get_height()
        xpos_center = rect.get_x() + rect.get_width() / 2
        heights.append(height)
        xpos.append(xpos_center)
        ax.annotate(f'{height:.3f}',
                    xy=(xpos_center, height),
                    xytext=(0, 3), textcoords="offset points",
                    ha='center', va='bottom', fontsize=10)

    ax.plot(xpos, heights, marker='o', linestyle='--', color=rects[0].get_facecolor(), alpha=0.8)

ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_title('Comparison of Model Performance Metrics', fontsize=14, fontweight='bold')
ax.set_ylabel('Metric Value', fontsize=13, fontweight='bold')
ax.legend()
ax.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.savefig('model_metrics_comparison_paperstyle.svg', format='svg', dpi=1200, bbox_inches='tight')
plt.close()




errors_list = [results[m]['metrics'][3].flatten() for m in labels]

all_errors_flat = np.hstack(errors_list)
global_min = np.percentile(all_errors_flat, 0.5) - 1
global_max = np.percentile(all_errors_flat, 99.5) + 1
x_range = np.linspace(global_min, global_max, 1000)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

plt.figure(figsize=(10, 5))
for i, (label, errors) in enumerate(zip(labels, errors_list)):
    kde = gaussian_kde(errors, bw_method=2.0)
    plt.plot(x_range, kde(x_range), label=label, color=colors[i], linewidth=2)

plt.title('Prediction Error Distribution of Models', fontsize=14, fontweight='bold')
plt.xlabel('Prediction Error', fontsize=13, fontweight='bold')
plt.ylabel('Probability Density', fontsize=13, fontweight='bold')
plt.ylim(0, 2)
plt.legend(fontsize=10)
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.savefig('error_pdf_comparison_paperstyle.svg', format='svg', dpi=1200, bbox_inches='tight')
plt.close()




from numpy.random import default_rng

def paired_bootstrap_metrics(y_true, y_pred, B=2000, seed=42):

    rng = default_rng(seed)
    n = y_true.shape[0]
    rmse_b, mae_b, r2_b = [], [], []
    for _ in range(B):
        idx = rng.integers(0, n, size=n)
        rm, r2, ma, _ = evaluate(y_true[idx], y_pred[idx])
        rmse_b.append(rm); mae_b.append(ma); r2_b.append(r2)

    def pack(arr):
        arr = np.asarray(arr)
        return float(arr.mean()), float(np.percentile(arr, 2.5)), float(np.percentile(arr, 97.5))

    return {'RMSE': pack(rmse_b), 'MAE': pack(mae_b), 'R$^2$': pack(r2_b)}


labels = list(results.keys())
stats_ci = {m: paired_bootstrap_metrics(Y_test, results[m]['pred'], B=2000, seed=2025)
            for m in labels}


metrics_order = ['RMSE', 'MAE', 'R$^2$']
fig, axes = plt.subplots(1, 3, figsize=(13, 4))

for ax, metric_name in zip(axes, metrics_order):
    means = [stats_ci[m][metric_name][0] for m in labels]
    lows  = [stats_ci[m][metric_name][1] for m in labels]
    highs = [stats_ci[m][metric_name][2] for m in labels]

    means = np.array(means); lows = np.array(lows); highs = np.array(highs)
    err_low  = means - lows
    err_high = highs - means

    x = np.arange(len(labels))
    ax.errorbar(x, means, yerr=[err_low, err_high],
                fmt='o', capsize=4, linewidth=1.5)
    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=0, ha='center')
    ax.tick_params(axis='x', pad=6)
    ax.set_title(metric_name, fontweight='bold')
    ax.set_ylabel('Value' if metric_name != 'R$^2$' else 'Score')
    ax.grid(True, linestyle='--', alpha=0.3)

fig.suptitle('Model performance with 95% confidence intervals ',
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('model_metrics_CI_only.svg', format='svg', dpi=1200, bbox_inches='tight')
plt.close()

