import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from sklearn.preprocessing import MinMaxScaler from sklearn_extra.cluster import KMedoids from sklearn.metrics import r2_score import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] #显示中文 plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 import random random.seed(8) def sliding_window(train, sw_width=30, n_out=1): X,y=[],[] for i in range(len(train)): # 找到最后一个元素的索引,因为for循环中i从1开始,切片索引从0开始,切片区间前闭后开,所以不用减去1; end_index = i + sw_width # 找到需要预测指定时间步长的最后一个元素的索引; out_end_index = end_index + n_out # 如果最后一个期望输出最后一个元素的索引大于序列中最后一个元素的索引则丢弃该样本; # 这里len(self.sequence)没有减去1的原因是:保证最后一个元素的索引恰好等于序列数据索引时,能够截取到样本; if out_end_index > len(train) : break # 实现以滑动步长为1(因为是for循环),窗口宽度为self.sw_width的滑动步长取值; seq_x, seq_y = train[i:end_index], train[end_index:out_end_index] X.append(seq_x) y.append(seq_y) #X=X.reshape((train.shape[0] * train.shape[1], train.shape[2])) #y=y.reshape((train.shape[0] * train.shape[1], train.shape[2])) return np.array(X), np.array(y) def ronhe (X1,X2): X1 = X1 X2 = X2 centroids1 = [] centroids2 = [] for _ in range(X1.shape[0]): # print(_) X = X1[_:_ + 1] # .values # print(X) X = np.array(X) X = X.reshape(-1, 1) model1 = KMedoids(n_clusters=1, random_state=0).fit(X) # n_clusters 簇的个数 cluster_labels = model1.predict(X) clusters, counts = np.unique(cluster_labels[cluster_labels >= 0], return_counts=True) centroids = np.array(model1.cluster_centers_) centroids1.append(centroids) for _ in range(X2.shape[0]): # print(_) X = X2[_:_ + 1] # .values # print(X) X = np.array(X) X = X.reshape(-1, 1) model1 = KMedoids(n_clusters=1, random_state=0).fit(X) # n_clusters 簇的个数 cluster_labels = model1.predict(X) clusters, counts = np.unique(cluster_labels[cluster_labels >= 0], return_counts=True) centroids = np.array(model1.cluster_centers_) centroids2.append(centroids) print(len(centroids1)) print("For centroids", centroids1) print(centroids1[0]) centroids1 = np.array(centroids1) centroids2 = np.array(centroids2) centroids1 = centroids1.reshape((centroids1.shape[0], 1)) centroids2 = centroids2.reshape((centroids2.shape[0], 1)) return centroids1, centroids2 if __name__ == '__main__': data = pd.read_excel('温湿度数据卸货(1500)V2.xlsx') data = data.values #print(data.head()) X1 = data[:1500, 1:5] X2 = data[:1500, 7:11] data1, data2 = ronhe(X1, X2) sc = MinMaxScaler(feature_range=(0, 1)) train = data1 + data2 test = data[:1003, 12:13] test1 = test[30:-1] train = sc.fit_transform(train) test = sc.transform(test) x = data[:1004 - 32, 0:1] #test1 = data[1108:1492 - 1, 1:2] train = train.astype(np.float32) test = test.astype(np.float32) test_x, test_y = sliding_window(test) train_x, train_y = sliding_window(train) train_x = train_x.reshape((train_x.shape[0], train_x.shape[1] * train_x.shape[2])) train_y = train_y.reshape((train_y.shape[0], train_y.shape[1] * train_y.shape[2])) model = RandomForestRegressor(random_state=42, # bootstrap=True, max_depth=7, max_features=2, min_samples_leaf=3, min_samples_split=5, n_estimators=600) model.fit(train_x, train_y) test = test.reshape(1, len(test)) # test_predict = model.predict(test) predict = [] out = 15 t1 = 30 t = 0 for i in range(test_x.shape[0]): test = test_x[i:i + 1, :] # print(test) test = test.reshape(1, t1) for _ in range(out): test_predict = model.predict(test) predict.append(test_predict) test = test.reshape(t1, 1) for t in range(t1): if t < t1 - 1: # print(test[t]) test[t] = test[t + 1] else: test[t] = test_predict test = test.reshape(1, len(test)) predict = np.array(predict) print("2222222222222222222") print(predict.shape) aa = int(predict.shape[0] / out) predict = predict.reshape(aa, out) a = predict[:1, :] # a = a.values a = a.reshape(a.shape[1], a.shape[0] * 1) y = [] y = predict[1:, -1:] for _ in range(int(out)): y = np.insert(y, 0, a[_]) for _ in range(int(out)): c = y[_] y[_] = y[out - 1 - _] y[out - 1 - _] = c y = y[:-15] predict = y print("333333333333333333333333") print(predict.shape) msetest = mean_squared_error(test1, predict) print(test1.shape) print(predict) print(msetest) y = y.reshape(y.shape[0], 1) y = sc.inverse_transform(y) mes = r2_score(test1, y) mas = mean_absolute_error(test1, y) rmes = mean_squared_error(test1, y) ** 0.5 print(mes, mas, rmes) plt.figure(figsize=(8, 6)) # 定义图的大小 #plt.xlabel("Time(Min)") # X轴标签 #plt.ylabel("Temperature(°Celsius)") # Y轴坐标标签 # plt.xlim(-100, 1599) plt.ylim(-30, 40) plt.plot(x, test1, color='red', label='Real') # plt.plot(x, yhat3, color='blue', label='融合曲线') plt.plot(x, y, color='green', label='Random Forest') # plt.plot(x, yhat2, color='pink', label='xgboost') plt.title(' Random Forest Regressor ', fontdict={'weight':'normal','size': 18}) plt.xlabel('Time(Min)', fontdict={'weight':'normal','size': 15}) plt.ylabel('Temperature(°Celsius)', fontdict={'weight':'normal','size': 15}) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.rcParams.update({'font.size': 15}) plt.legend(loc='lower left') plt.show()