import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn_extra.cluster import KMedoids
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #显示中文
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号

import random
random.seed(8)

def sliding_window(train, sw_width=30, n_out=1):
    X,y=[],[]
    for i in range(len(train)):
            # 找到最后一个元素的索引，因为for循环中i从1开始，切片索引从0开始，切片区间前闭后开，所以不用减去1；
            end_index = i + sw_width
            # 找到需要预测指定时间步长的最后一个元素的索引；
            out_end_index = end_index + n_out
            # 如果最后一个期望输出最后一个元素的索引大于序列中最后一个元素的索引则丢弃该样本；
            # 这里len(self.sequence)没有减去1的原因是：保证最后一个元素的索引恰好等于序列数据索引时，能够截取到样本；
            if out_end_index > len(train) :
                break

            # 实现以滑动步长为1（因为是for循环），窗口宽度为self.sw_width的滑动步长取值；
            seq_x, seq_y = train[i:end_index], train[end_index:out_end_index]
            X.append(seq_x)
            y.append(seq_y)
    #X=X.reshape((train.shape[0] * train.shape[1], train.shape[2]))
    #y=y.reshape((train.shape[0] * train.shape[1], train.shape[2]))


    return np.array(X), np.array(y)


def ronhe (X1,X2):
    X1 = X1
    X2 = X2
    centroids1 = []
    centroids2 = []

    for _ in range(X1.shape[0]):
        # print(_)
        X = X1[_:_ + 1]  # .values
        # print(X)
        X = np.array(X)
        X = X.reshape(-1, 1)
        model1 = KMedoids(n_clusters=1, random_state=0).fit(X)  # n_clusters 簇的个数
        cluster_labels = model1.predict(X)
        clusters, counts = np.unique(cluster_labels[cluster_labels >= 0], return_counts=True)
        centroids = np.array(model1.cluster_centers_)
        centroids1.append(centroids)

    for _ in range(X2.shape[0]):
        # print(_)
        X = X2[_:_ + 1]  # .values
        # print(X)
        X = np.array(X)
        X = X.reshape(-1, 1)
        model1 = KMedoids(n_clusters=1, random_state=0).fit(X)  # n_clusters 簇的个数
        cluster_labels = model1.predict(X)
        clusters, counts = np.unique(cluster_labels[cluster_labels >= 0], return_counts=True)
        centroids = np.array(model1.cluster_centers_)
        centroids2.append(centroids)
    print(len(centroids1))
    print("For centroids", centroids1)
    print(centroids1[0])

    centroids1 = np.array(centroids1)
    centroids2 = np.array(centroids2)
    centroids1 = centroids1.reshape((centroids1.shape[0], 1))
    centroids2 = centroids2.reshape((centroids2.shape[0], 1))
    return  centroids1, centroids2



if __name__ == '__main__':
    data = pd.read_excel('温湿度数据卸货（1500）V2.xlsx')
    data = data.values
    #print(data.head())
    X1 = data[:1500, 1:5]
    X2 = data[:1500, 7:11]
    data1, data2 = ronhe(X1, X2)
    sc = MinMaxScaler(feature_range=(0, 1))
    train = data1 + data2
    test = data[:1003, 12:13]
    test1 = test[30:-1]
    train = sc.fit_transform(train)
    test = sc.transform(test)

    x = data[:1004 - 32, 0:1]
    #test1 = data[1108:1492 - 1, 1:2]
    train = train.astype(np.float32)
    test = test.astype(np.float32)

    test_x, test_y = sliding_window(test)
    train_x, train_y = sliding_window(train)
    train_x = train_x.reshape((train_x.shape[0], train_x.shape[1] * train_x.shape[2]))
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1] * train_y.shape[2]))

    model = RandomForestRegressor(random_state=42,
                              #  bootstrap=True,
                                  max_depth=7,
                                  max_features=2,
                                  min_samples_leaf=3,
                                  min_samples_split=5,
                                  n_estimators=600)

    model.fit(train_x, train_y)

    test = test.reshape(1, len(test))

    # test_predict = model.predict(test)
    predict = []
    out = 15
    t1 = 30
    t = 0

    for i in range(test_x.shape[0]):
        test = test_x[i:i + 1, :]
        # print(test)
        test = test.reshape(1, t1)
        for _ in range(out):
            test_predict = model.predict(test)
            predict.append(test_predict)
            test = test.reshape(t1, 1)
            for t in range(t1):
                if t < t1 - 1:
                    # print(test[t])
                    test[t] = test[t + 1]
                else:
                    test[t] = test_predict
            test = test.reshape(1, len(test))

    predict = np.array(predict)
    print("2222222222222222222")
    print(predict.shape)
    aa = int(predict.shape[0] / out)
    predict = predict.reshape(aa, out)
    a = predict[:1, :]
    # a = a.values
    a = a.reshape(a.shape[1], a.shape[0] * 1)
    y = []
    y = predict[1:, -1:]
    for _ in range(int(out)):
        y = np.insert(y, 0, a[_])

    for _ in range(int(out)):
        c = y[_]
        y[_] = y[out - 1 - _]
        y[out - 1 - _] = c

    y = y[:-15]
    predict = y

    print("333333333333333333333333")
    print(predict.shape)

    msetest = mean_squared_error(test1, predict)
    print(test1.shape)
    print(predict)
    print(msetest)

    y = y.reshape(y.shape[0], 1)
    y = sc.inverse_transform(y)

    mes = r2_score(test1, y)
    mas = mean_absolute_error(test1, y)
    rmes = mean_squared_error(test1, y) ** 0.5

    print(mes, mas, rmes)

    plt.figure(figsize=(8, 6))  # 定义图的大小
    #plt.xlabel("Time(Min)")     # X轴标签
    #plt.ylabel("Temperature(°Celsius)")        # Y轴坐标标签
    # plt.xlim(-100, 1599)
    plt.ylim(-30, 40)

    plt.plot(x, test1, color='red', label='Real')
    # plt.plot(x, yhat3, color='blue', label='融合曲线')
    plt.plot(x, y, color='green', label='Random Forest')
    # plt.plot(x, yhat2, color='pink', label='xgboost')
    plt.title(' Random Forest Regressor ', fontdict={'weight':'normal','size': 18})
    plt.xlabel('Time(Min)', fontdict={'weight':'normal','size': 15})
    plt.ylabel('Temperature(°Celsius)', fontdict={'weight':'normal','size': 15})

    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)

    plt.rcParams.update({'font.size': 15})
    plt.legend(loc='lower left')
    plt.show()