from sklearn.svm import SVR
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, median_absolute_error
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
import pickle
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
import random
from sklearn.preprocessing import StandardScaler

# Define the fitness function to measure the quality of the SVM regression model
def fitness(individual):
    gamma = individual[0]
    C = individual[1]
    svr = svm.SVR(gamma=gamma, C=C)
    svr.fit(X_train, y_train)
    y_pred = svr.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return -mse  # Return the negative value of MSE because we want it as small as possible

# Initialize population for the genetic algorithm
def init_population():
    population = []
    for _ in range(POP_SIZE):
        individual = [random.uniform(0.1, 10), random.uniform(0.1, 1000)]
        population.append(individual)
    return population

# Crossover operation in genetic algorithm
def crossover(parent, pop):
    parent = np.array(parent)
    pop = np.array(pop)
    i_ = np.random.randint(0, len(pop) - 1)
    cross_points = np.random.randint(0, 2, len(parent)).astype(bool)
    parent[cross_points] = pop[i_][cross_points]
    return parent.tolist()

# Mutation operation in genetic algorithm
def mutate(child):
    for point in range(2):
        if np.random.rand() < MUTATION_RATE:
            child[point] = random.uniform(0, 10)  # Randomly mutate the parameter
    return child

# Main process of the genetic algorithm
def genetic_algorithm():
    population = init_population()
    for generation in range(N_GENERATIONS):
        print("Generation:", generation)
        fitness_values = [fitness(individual) for individual in population]
        best_fitness = max(fitness_values)
        parent_idxs = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness_values/sum(fitness_values))
        parents = [population[idx] for idx in parent_idxs]
        children = []
        for parent in parents:
            child = crossover(parent.copy(), population)
            child = mutate(child)
            children.append(child)
        population = children
    best_individual = max(population, key=fitness)
    return best_individual

# Apply median filter to the signal
def median_filter(signal, kernel_size):
    assert kernel_size % 2 == 1, "Kernel size must be odd!"
    padded_signal = np.pad(signal, (kernel_size // 2, kernel_size // 2), mode='reflect')
    filtered_signal = np.zeros_like(signal)
    for i in range(len(signal)):
        filtered_signal[i] = np.median(padded_signal[i:i + kernel_size])
    return filtered_signal

# Data preprocessing
def yuchuli():
    dataframe = pd.read_excel('wh176_240150_2023-04-25.xlsx', sheet_name=2)
    input_data = dataframe.iloc[:, 0:5].values
    output_data = dataframe.iloc[:, 5].values
    X = np.apply_along_axis(lambda m: median_filter(m, kernel_size=3), axis=0, arr=input_data)
    y = median_filter(output_data, kernel_size=3)
    return X, y

# Model training with cross-validation
def z_xunlian(X_train, X_test, y_train, y_test, best_gamma, best_C):
    # Split the data into training and test sets

    # Standardize the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Create an instance of RidgeSVR
    ridge_svr = SVR(kernel='rbf', epsilon=0.001, C=best_C, gamma=best_gamma, shrinking=True, max_iter=-1, verbose=2)

    # Train the model
    ridge_svr.fit(X_train, y_train)

    y_pred = ridge_svr.predict(X_test)
    med = median_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    # Calculate the R2 score
    r2 = r2_score(y_test, y_pred)
    print("Mean Squared Error:", mse)
    # Calculate the median absolute error

    print("Median Absolute Error:", med)
    print("R2 Score:", r2)
    return y_test, y_pred

# Plotting the curve
def quxiantu(y_test, y_pred):
    # Use system built-in font

    # Plot the curve
    plt.plot(y_test[100:150], label='Actual Value', color='blue', linestyle='-')
    plt.plot(y_pred[100:150], label='Predicted Value', color='red', linestyle='--')

    # Set labels and titles
    plt.xlabel('Sample Size')
    plt.ylabel('Qualified Rate of Granularity')
    plt.title('Prediction Model of Qualified Rate of Granularity')
    plt.rcParams['font.sans-serif'] = ['SimHei']
    # Display the legend
    plt.legend()

    # Display the graph
    plt.show()

if __name__ == "__main__":
    # Genetic algorithm parameters
    POP_SIZE = 500         # Population size
    CROSS_RATE = 0.8       # Crossover probability
    MUTATION_RATE = 0.05   # Mutation probability
    N_GENERATIONS = 1000   # Number of generations
    X, y = yuchuli()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.04, random_state=46)
    # Run genetic algorithm to find optimal parameters
    best_params = genetic_algorithm()
    best_gamma, best_C = best_params[0], best_params[1]
    print('best_gamma:', best_gamma)
    print('best_C:', best_C)
    y_test, y_pred = z_xunlian(X_train, X_test, y_train, y_test, best_gamma, best_C)
    # Train the optimal model on all training data
    quxiantu(y_test, y_pred)