# -*- coding: utf-8 -*- """ """ import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn import metrics from sklearn.tree import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier from sklearn.model_selection import KFold, StratifiedKFold import numpy as np import xgboost as xgb from xgboost import plot_importance, plot_tree from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel import seaborn as sns; sns.set() import sys if not sys.warnoptions: import warnings warnings.simplefilter("ignore") #from sklearn.metrics import mean_absolute_percentage_error import sys if not sys.warnoptions: import warnings warnings.simplefilter("ignore") if __name__ == '__main__': pd.options.display.max_columns=None df_ori = pd.read_csv('seoul hourly/2020_hourly.csv', sep = ',', encoding= 'unicode_escape') #df_ori = df_ori.dropna() #df_ori = df_ori[df_ori['Functioning Day'] != 'No'] df_ori2 = df_ori[['hour', 'day_week', 'month', 'count']] df_ori2.columns = ['hour', 'day', 'month', 'rented_bikes'] fig3 = plt.figure(figsize=(6,6)) dt_plot = df_ori2 g = sns.heatmap(dt_plot.corr(),cmap="BrBG",annot=False, center=0) dt_plot.corr() plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.tight_layout() plt.show() fig3.savefig('heat map.png', format='png', dpi=300) df_ori['date_time'] = pd.to_datetime(df_ori['date_time']) y = df_ori['count'] x = df_ori['date_time'].dt.strftime('%Y-%m-%d %hh:%mm:%ss') interval = 500 fig1=plt.figure(figsize=(14,6)) #plt.title('Rented Bikes') plt.plot(x,y, label = '') plt.legend(loc='upper right', prop={'size':15}) plt.xlabel('Date and time', fontsize=15) plt.ylabel('Number of rented bikes', fontsize=15) plt.gcf().autofmt_xdate() plt.xticks(np.arange(0, len(x)+1, interval)) #plt.show() fig1.savefig('rented bikes hourly.png', format='png', dpi=300) #------------------------ df_day = df_ori.resample('d', on='date_time').mean().reset_index() y = df_day['count'] x = df_day['date_time'].dt.strftime('%Y-%m-%d') interval = 14 fig1=plt.figure(figsize=(14,5)) #plt.title('Rented Bikes') plt.plot(x,y) plt.legend(loc='upper right', prop={'size':15}) plt.xlabel('Date', fontsize=15) plt.ylabel('Average number of rented bikes', fontsize=15) plt.gcf().autofmt_xdate() plt.xticks(np.arange(0, len(x), interval)) #plt.show() fig1.savefig('rented bikes daily.png', format='png', dpi=300) #----------------- df_month = df_ori.resample('m', on='date_time').mean().reset_index() y = df_month['count'] x = df_month['date_time'].dt.strftime('%Y-%m') interval = 1 fig1=plt.figure(figsize=(14,4)) #plt.title('Rented Bikes') plt.bar(x,y) plt.legend(loc='upper right', prop={'size':15}) plt.xlabel('Month', fontsize=15) plt.ylabel('Average number of rented bikes', fontsize=15) plt.gcf().autofmt_xdate() plt.xticks(np.arange(0, len(x), interval)) #plt.show() fig1.savefig('rented bikes monthly.png', format='png', dpi=300) #------------------------ df_hour = df_ori.groupby(['day_week','hour']).mean().reset_index() df_monday = df_hour[0:24] df_tuesday = df_hour[24:48] df_wednesday = df_hour[48:72] df_thursday =df_hour[72:96] df_friday = df_hour[96:120] df_saturday = df_hour[120:144] df_sunday = df_hour[144:168] fig1=plt.figure(figsize=(14,4)) x = df_monday['hour'] #plt.title('Rented Bikes') plt.plot(x, df_monday['count'].values, label = 'Monday') plt.plot(x, df_tuesday['count'].values, label = 'Tuesday') plt.plot(x, df_wednesday['count'].values, label = 'Wednesday') plt.plot(x, df_thursday['count'].values, label = 'Thursday') plt.plot(x, df_friday['count'].values, label = 'Friday') plt.plot(x, df_saturday['count'].values, label = 'Saturday') plt.plot(x, df_sunday['count'].values, label = 'Sunday') plt.legend(loc='upper right', prop={'size':15}) plt.xlabel('Hours of the day', fontsize=15) plt.ylabel('Average number of rented bikes', fontsize=15) #plt.show() #plt.gcf().autofmt_xdate() plt.xticks(np.arange(0, len(x), 1)) #plt.show() fig1.savefig('rented bikes per week.png', format='png', dpi=300) #---- y_count = df_ori['count'] y_hour = df_ori['hour'] y_day = df_ori['day_week'] y_month = df_ori['month'] fig1=plt.figure(figsize=(18,6)) plt.plot(y_count[0:50], label='rented bikes', marker='o' , color='blue') plt.legend(loc='upper right', prop={'size':20}) plt.xlabel('Time t', fontsize=20) plt.ylabel('Number of rented bikes', fontsize=20) plt.xticks(np.arange(0, 50, 2), fontsize=20, rotation=45) plt.yticks(fontsize=20)