{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"5moDJqbfvoOR"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OjZrV_IGvsPa"},"outputs":[],"source":["from zipfile import ZipFile\n","file_name = \"/content/drive/MyDrive/Datasets/Arabic_Hand_Written_Dataset.zip\"\n","with ZipFile(file_name, 'r') as zip:\n","  zip.extractall()\n","  print('finish')  #just to show that file is unzipped"]},{"cell_type":"markdown","source":["# ###### Importing Important Libraries"],"metadata":{"id":"mIg-CyVLQLHH"}},{"cell_type":"code","source":[""],"metadata":{"id":"Co2qbNbDSx4H"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"6exEavMTvziU"},"outputs":[],"source":["import tensorflow as tf                                                         \n","import numpy as np                                                              \n","import pandas as pd                                                             \n","import cv2    \n","from sklearn.metrics import confusion_matrix\n","%matplotlib inline\n","from sklearn.metrics import accuracy_score\n","import matplotlib.image  as mpimg\n","import matplotlib.pyplot as plt                                                        \n","from matplotlib import pyplot as plt                                            \n","from tensorflow.keras.utils import to_categorical  \n","from sklearn.utils import shuffle  \n","from tensorflow.keras.models import Sequential\n","from tensorflow.keras.callbacks import ModelCheckpoint                                     \n","from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Dropout, Dense, Flatten\n","from sklearn.metrics import roc_auc_score\n","import seaborn as sns\n","%%capture\n","from sklearn.metrics import classification_report\n","from sklearn import metrics\n","from sklearn.metrics import roc_curve\n","import matplotlib.image  as mpimg\n","import matplotlib.pyplot as plt"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_5VPEQ1qv0Re"},"outputs":[],"source":["train_data_x= pd.read_csv(\"/content/Arabic_Hand_Written_Dataset/csvTrainImages 13440x1024.csv\")\n","train_data_y= pd.read_csv(\"/content/Arabic_Hand_Written_Dataset/csvTrainLabel 13440x1.csv\")\n","test_data_x= pd.read_csv(\"/content/Arabic_Hand_Written_Dataset/csvTestImages 3360x1024.csv\")\n","test_data_y= pd.read_csv(\"/content/Arabic_Hand_Written_Dataset/csvTestLabel 3360x1.csv\")"]},{"cell_type":"code","source":["#### Pixels"],"metadata":{"id":"-gyoyVlzQk7J"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ojuubd0hv3U4"},"outputs":[],"source":["print('We have  %d training images each contains %d pixels.' %(train_data_x.shape[0], train_data_x.shape[1]))\n","####print('We have  %d training labels each contains %d classes.' %(train_data_y.shape[0], len(train_data_y.value_counts())))\n","####print('We have  %d training labels each contains %d classes.' %(train_data_y.shape[0], len(train_data_y.value_counts())))\n","print('We have  %d training labels each contains %d classes.' %(train_data_y.shape[0], len(train_data_y.value_counts())))\n","####print('We have  %d training labels each contains %d classes.' %(train_data_y.shape[0], len(train_data_y.value_counts())))\n","print('We have  %d testing images each contains %d pixels.' %(test_data_x.shape[0], test_data_x.shape[1]))\n","print('We have  %d testing labels each contains %d classes.' %(test_data_y.shape[0], len(test_data_y.value_counts())))"]},{"cell_type":"code","source":["####Dimensions /example"],"metadata":{"id":"TZBc1rUmQtcp"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"rBMGI0UIv6DE"},"outputs":[],"source":["fig = plt.figure(figsize=(8, 8))                   \n","columns = 4                                                                     \n","rows = 5      \n","####for i in range(1, columns*rows +1):                                                                  \n","for i in range(1, columns*rows +1):                                             \n","  img = test_data_x.iloc[i].to_numpy().reshape((32,32))                         \n","  fig.add_subplot(rows, columns, i)                                             \n","  plt.imshow(img, cmap='gray')                                                  \n","plt.show() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0ob2lpHlv-rg"},"outputs":[],"source":["def preprocess_data(train_data_x):\n","  train_data_x = train_data_x.to_numpy().reshape((train_data_x.shape[0], 32, 32)).astype('uint8')  ##numpy operation/////\n","  for i in range(len(train_data_x)):\n","    train_data_x[i] = cv2.rotate(train_data_x[i], cv2.ROTATE_90_CLOCKWISE)      \n","    train_data_x[i] = np.flip(train_data_x[i], 1)                               \n","  train_data_x = train_data_x.reshape([-1, 32, 32, 1]).astype('uint8')          \n","  train_data_x = train_data_x.astype('float32')/255                             \n","  return np.asarray(train_data_x)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IEZREYQ8wDcC"},"outputs":[],"source":["train_x = preprocess_data(train_data_x)                                         \n","test_x = preprocess_data(test_data_x)                                           "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"y3DRAwS4wIqS"},"outputs":[],"source":["train_y = to_categorical(train_data_y.values.astype('int32') - 1, num_classes=28)  ###To categorical\n","test_y = to_categorical(test_data_y.values.astype('int32') - 1, num_classes=28)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IaOzwM75wK4V"},"outputs":[],"source":["train_x, train_y = shuffle(train_x, train_y)                                    \n","test_x, test_y, shuffle(test_x, test_y)                                         "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2sEwzEExwT5T"},"outputs":[],"source":["def create_model(activation='relu', optimizer='adam', kernel_initializer='he_normal'):\n","    \n","    model = Sequential()\n","\n","    model.add(Conv2D(32, (3,3), padding='same', input_shape=(32, 32, 1), activation= activation, kernel_initializer=kernel_initializer))\n","    model.add(MaxPooling2D(2,2))\n","    model.add(Dropout(0.2))\n","    model.add(BatchNormalization())\n","    \n","\n","    model.add(Conv2D(64, (3,3), padding='same', activation= activation, kernel_initializer=kernel_initializer))\n","    model.add(MaxPooling2D(2,2))\n","    model.add(Dropout(0.2))\n","    model.add(BatchNormalization())\n","    \n","\n","    model.add(Conv2D(128, (3,3), padding='same', activation= activation, kernel_initializer=kernel_initializer))\n","    model.add(MaxPooling2D(2,2))\n","    model.add(Dropout(0.2))\n","    model.add(BatchNormalization())\n","    \n","\n","    model.add(Flatten())\n","    model.add(Dense(32, activation= activation, kernel_initializer=kernel_initializer, kernel_regularizer='l2'))\n","    model.add(Dropout(0.2))\n","    model.add(BatchNormalization())\n","    \n","\n","    model.add(Dense(28, activation='softmax'))\n","\n","    model.compile(optimizer=optimizer,\n","                    loss='categorical_crossentropy',\n","                    metrics=['accuracy'])\n","    return model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"K6fmRpEewXaA"},"outputs":[],"source":["model = create_model()                                                          \n","model.summary()                                                                 "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"H8S2KNAbwa4E"},"outputs":[],"source":["\n","model = create_model(optimizer='adam',                                       \n","                     kernel_initializer='uniform',\n","                     activation='relu')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"3eEvyedywfpS"},"outputs":[],"source":["checkpointer = ModelCheckpoint(filepath='weights.hdf5', verbose=1, save_best_only=True)\n","history = model.fit(train_x,                                                    \n","                    train_y, \n","                    validation_split= 0.3,                                      \n","                    epochs=50,                   #### 50 iterations                               \n","                    batch_size=50,               #### 50 batches                  \n","                    callbacks=[checkpointer])                                   "]},{"cell_type":"markdown","metadata":{"id":"xwyTWeqlQW7i"},"source":["Training Accuracy"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ip0UBfmMLvsT"},"outputs":[],"source":["model.load_weights('weights.hdf5')                                                     \n","model.evaluate(train_x, train_y)   "]},{"cell_type":"markdown","metadata":{"id":"4baNqosxQe1F"},"source":["Testing Performnce"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xFA5fINVwtxk"},"outputs":[],"source":["model.load_weights('weights.hdf5')                                                      \n","model.evaluate(test_x, test_y)   "]},{"cell_type":"markdown","metadata":{"id":"InWuDRPK-s3D"},"source":["**Testing model on unseen data**"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0DdhQk2zioEZ"},"outputs":[],"source":["test_x, test_y = shuffle(test_x, test_y, random_state=30)\n","test_y = test_y[:10]\n","test_x = test_x[:10]\n","print('Arabic alphabet:')\n","print(\"ي و ه ن م ل ك ق ف غ ع ظ ط ض ص ش س ز ر ذ د خ ح ج ث ت ب ا\\n0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27\")\n","print()\n","pred = model.predict(test_x)\n","outputDf = pd.DataFrame(pred)\n","predictedIndex = list(outputDf.idxmax(axis=1))\n","print(\"Predicted index: \", predictedIndex)\n","print()\n","test_y = pd.DataFrame(test_y)\n","realIndex = list(test_y.idxmax(axis=1))\n","print(\"Real index: \", realIndex)\n","for i in range(len(test_x)):\n","    real = realIndex[i]\n","    predicted = predictedIndex[i]\n","    plt.title(f'Actual: {real}    predicted: {predicted}')\n","    img = test_x[i]\n","    img = cv.rotate(img, cv.ROTATE_90_CLOCKWISE)\n","    plt.imshow(img,cmap='gray')\n","    plt.show() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-e1NOvVxzEnk"},"outputs":[],"source":["pred_y = model.predict(test_x)\n","pred_y=np.argmax(pred_y, axis=1)\n","test_y=np.argmax(test_y, axis=1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pcm-Kh6xi3_I"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"qclVaVnm06Re"},"outputs":[],"source":["accuracy_score(test_y, pred_y)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"7rpITRI6086n"},"outputs":[],"source":["# LOSS VS ACCURACY\n","\n","acc=history.history['accuracy']\n","val_acc=history.history['val_accuracy']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","epochs=range(len(acc)) \n","plt.plot(history.history['accuracy'])\n","plt.plot(history.history['val_accuracy'])\n","plt.title('Training and validation accuracy')\n","plt.legend(['training accuracy', 'validation accuracy'], loc='lower right')\n","plt.ylabel('accuracy')\n","plt.xlabel('epoch')\n","plt.figure()\n","\n","\n","plt.plot(history.history['loss'])\n","plt.plot(history.history['val_loss'])\n","plt.title('Training and validation loss')\n","plt.legend(['training loss', 'validation loss'], loc=' upper right')\n","plt.ylabel('loss')\n","plt.xlabel('epoch')\n","\n","\n","plt.title('Training and validation loss')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0NcMeys5Y8QN"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iXK7apC2WCKv"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QYl0ANwx1LFD"},"outputs":[],"source":["cm = confusion_matrix(test_y, pred_y)\n","cm"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J_oNgK6x1SGT"},"outputs":[],"source":["sns.heatmap(cm,annot=True)\n","#plt.subplots(1,1,figsize=(14,7))\n","plt.savefig('h.png')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"glV1F63p1V9t"},"outputs":[],"source":["fig,ax=plt.subplots(1,1,figsize=(10,10))\n","sns.heatmap(confusion_matrix(test_y, pred_y),annot=True)\n","\n","\n","plt.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"mea2dgOi1arg"},"outputs":[],"source":["print(\"Classification Report: \\n\", classification_report(test_y, pred_y))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"wX5ULoxBhC7I"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2aVpIABfgqP9"},"outputs":[],"source":[""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"q3JhgAAq1t-S"},"outputs":[],"source":["y_pred_prob = model.predict(test_x)[:, 1]\n","\n","fpr, tpr, thresholds = metrics.roc_curve(test_y, y_pred_prob, pos_label = 1)\n","plt.plot(fpr, tpr)\n","plt.xlim([0.0, 1.0])\n","plt.ylim([0.0, 1.0])\n","plt.title('ROC curve classifier')\n","plt.xlabel('False Positive Rate (FPR)')\n","plt.ylabel('True Positive Rate (TPR)')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FBZm92XG7vlO"},"outputs":[],"source":["fpr = {}\n","tpr = {}\n","thresh ={}\n","\n","n_class = 28\n","\n","for i in range(n_class):    \n","    fpr[i], tpr[i], thresh[i] = roc_curve(test_y, y_pred_prob, pos_label = i)\n","plt.plot(fpr[0], tpr[0], linestyle='--',color='orange', label='أ')\n","plt.plot(fpr[1], tpr[1], linestyle='--',color='green', label='ب')\n","plt.plot(fpr[2], tpr[2], linestyle='--',color='blue', label='ت')\n","plt.plot(fpr[3], tpr[3], linestyle='--',color='yellow', label='ث')\n","plt.plot(fpr[4], tpr[4], linestyle='--',color='purple', label='ج')\n","plt.plot(fpr[5], tpr[5], linestyle='--',color='lime', label='ح')\n","plt.plot(fpr[6], tpr[6], linestyle='--',color='chocolate', label='خ')\n","plt.plot(fpr[7], tpr[7], linestyle='--',color='black', label='د')\n","plt.plot(fpr[8], tpr[8], linestyle='--',color='grey', label='ذ')\n","plt.plot(fpr[9], tpr[9], linestyle='--',color='red', label='ر')\n","plt.plot(fpr[10], tpr[10], linestyle='--',color='navy', label='ز')\n","plt.plot(fpr[11], tpr[11], linestyle='--',color='gold', label='س')\n","plt.plot(fpr[12], tpr[12], linestyle='--',color='orchid', label='ش')\n","plt.plot(fpr[13], tpr[13], linestyle='--',color='olive', label='ص')\n","plt.plot(fpr[14], tpr[14], linestyle='--',color='pink', label='ض')\n","plt.plot(fpr[15], tpr[15], linestyle='--',color='silver', label='ط')\n","plt.plot(fpr[16], tpr[16], linestyle='--',color='tan', label='ظ')\n","plt.plot(fpr[17], tpr[17], linestyle='--',color='aqua', label='ع')\n","plt.plot(fpr[18], tpr[18], linestyle='--',color='indigo', label='غ')\n","plt.plot(fpr[19], tpr[19], linestyle='--',color='ivory', label='ف')\n","plt.plot(fpr[20], tpr[20], linestyle='--',color='maroon', label='ق')\n","plt.plot(fpr[21], tpr[21], linestyle='--',color='brown', label='ك')\n","plt.plot(fpr[22], tpr[22], linestyle='--',color='plum', label='ل')\n","plt.plot(fpr[23], tpr[23], linestyle='--',color='yellow', label='م')\n","plt.plot(fpr[24], tpr[24], linestyle='--',color='teal', label='ن')\n","plt.plot(fpr[25], tpr[25], linestyle='--',color='azure', label='ه')\n","plt.plot(fpr[26], tpr[26], linestyle='--',color='beige', label='و')\n","plt.plot(fpr[27], tpr[27], linestyle='--',color='red', label='ى')\n","plt.title('Multiclass ROC curve')\n","plt.xlabel('False Positive Rate')\n","plt.ylabel('True Positive rate')\n","plt.legend(loc='lower right')\n","plt.savefig('Multiclass ROC');"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ROkd_pRpEvNT"},"outputs":[],"source":["pred_y = model.predict(test_x)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"RYNIHEOHS4-0"},"outputs":[],"source":["roc_auc_score(test_y, pred_y, multi_class='ovr')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1hwPSegqdjnf"},"outputs":[],"source":["\n","\n","acc=history.history['accuracy']\n","val_acc=history.history['val_accuracy']\n","loss=history.history['loss']\n","val_loss=history.history['val_loss']\n","\n","epochs=range(len(acc)) \n","\n","\n","plt.plot(history.history['accuracy'])\n","plt.plot(history.history['val_accuracy'])\n","plt.title('Training and validation accuracy')\n","plt.legend(['training accuracy', 'validation accuracy'], loc='lower right')\n","plt.ylabel('accuracy')\n","plt.xlabel('epoch')\n","plt.figure()\n","\n","\n","plt.plot(history.history['loss'])\n","plt.plot(history.history['val_loss'])\n","plt.title('Training and Testing loss')\n","plt.legend(['training loss', 'validation loss'], loc=' upper right')\n","plt.ylabel('loss')\n","plt.xlabel('epoch')\n","\n","####Plot tilte \n","plt.title('Training and validation loss')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"biVSciAYHaD8"},"outputs":[],"source":[""]}],"metadata":{"accelerator":"TPU","colab":{"collapsed_sections":[],"name":"Copy of AHCD_Final.ipynb","provenance":[{"file_id":"1u5vC3Klx0BK01xxCehFPTaXSR9ixuF8v","timestamp":1648449328745},{"file_id":"1nHbfaFg7OKxN4Q0qmsbR4nS0bO7Iu5id","timestamp":1641475794014}],"authorship_tag":"ABX9TyNCuZ1D6DLiOUAH6MYmEELS"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}