# 1. Importing necessary libraries

In [1]:
import os
import pandas as pd
import numpy as np
import re
import tensorflow as tf
tf.config.run_functions_eagerly(True)
# Enable eager execution for tf.data functions
tf.data.experimental.enable_debug_mode()

# Now you can use tf.data functions with eager execution
# ... your code using tf.data ...

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications import EfficientNetB1
from tensorflow.keras.layers import (
    Input,
    Embedding,
    LSTM,Bidirectional,
    Concatenate,
    GlobalAveragePooling2D,
    Dense,
    Dropout)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

# 2. Loading training and test files

In [2]:
path11="C://Users//FurqanSaddozai//scrapping_tweets_16_july_2022//Uni-multi-urdu-HS//"
file_path_1 = path11+"MMHS11K_train.xlsx"
file_path_2 = path11+"MMHS11K_test.xlsx"


# Load specific columns into a DataFrame
columns_to_load = ["Tweet_Id","Text", "Image_Text","Label"]
df_train = pd.read_excel(file_path_1, usecols=columns_to_load)
df_test = pd.read_excel(file_path_2, usecols=columns_to_load)

# 3. Exploring dataset related information

In [3]:
# Display the DataFrame
print('Training data')
print(df_train[:2])
print('................................')
print('Test data')
print(df_test[:2])
print('..........................................')
print('Number of samples in training data=')
print(len(df_train))
#print(train_data.head(3))
print('Number of samples in test data=')
print(len(df_test))
#print(test_data.head(3))

Training data
              Tweet_Id                                               Text  \
0  1596192893743796224  مرد کی بہت سی پریشانیاں خوبصورت عورت کو دیکھ ک...   
1  1579509182524723200  یورپی ممالک کو گیس اور بجلی کی قلت کے باعث موس...   

  Image_Text    Label  
0        NIL  No_Hate  
1        NIL  No_Hate  
................................
Test data
              Tweet_Id                                               Text  \
0  1558019697425219585  @SdqJaan اس مادر چوت کی مسخ شدہ لاش ملنی چائیے...   
1  1578714258766192643  @Jooookeeeerrrr یہ کس گانڈو کی اولاد ہے؟جو 60 ...   

                                          Image_Text Label  
0                                                NIL  Hate  
1  8ا 8 8\n:۰\n\n2 9 10:51\n358 ۵۱ ۷۸۷۰۱ لھگ\nٹیر...  Hate  
..........................................
Number of samples in training data=
8800
Number of samples in test data=
2200


# 4. Text Preprocessing both tweet_text and image_text

# 4.1 Removing all digits, non-Urdu characters or words from Text

In [4]:
#4.1.1 Removing all digits, non-Urdu characters or words from Tweet text
df_train['Text']=df_train['Text'].apply((lambda x: re.sub(r'[^\u0600-\u06FF\s]+',' ',x)))
df_test['Text']=df_test['Text'].apply((lambda x: re.sub(r'[^\u0600-\u06FF\s]+',' ',x)))

#4.1.2 Removing all digits, non-Urdu characters or words from Image text
df_train['Image_Text']=df_train['Image_Text'].apply((lambda x: re.sub(r'[^\u0600-\u06FF\s]+',' ',x)))
df_test['Image_Text']=df_test['Image_Text'].apply((lambda x: re.sub(r'[^\u0600-\u06FF\s]+',' ',x)))

# 4.2 Removing all Urdu digits  from  text

In [5]:
# 4.2.1 Removing all Urdu digits  from Tweet text
df_train['Text']=df_train['Text'].apply((lambda x: re.sub('[۰-۹]',' ',x)))
df_test['Text']=df_test['Text'].apply((lambda x: re.sub('[۰-۹]',' ',x)))
# 4.2.2 Removing all Urdu digits  from Image text
df_train['Image_Text']=df_train['Image_Text'].apply((lambda x: re.sub('[۰-۹]',' ',x)))
df_test['Image_Text']=df_test['Image_Text'].apply((lambda x: re.sub('[۰-۹]',' ',x)))

# 4.3 Removing all punctuations marks from  text


In [6]:
#4.3.1 Removing all punctuations marks from tweets text
df_train['Text']=df_train['Text'].apply((lambda x: re.sub('[^\w\s]',' ',x)))
df_test['Text']=df_test['Text'].apply((lambda x: re.sub('[^\w\s]',' ',x)))
#4.3.2 Removing all punctuations marks from Image text
df_train['Image_Text']=df_train['Image_Text'].apply((lambda x: re.sub('[^\w\s]',' ',x)))
df_test['Image_Text']=df_test['Image_Text'].apply((lambda x: re.sub('[^\w\s]',' ',x)))

# 4.4 Removing all Arabic numerial or digits from text


In [7]:
#4.4.1 Removing all Arabic numerial or digits from tweet text
df_train['Text']=df_train['Text'].apply((lambda x: re.sub(r'[٠١٢٣٤٥٦٧٨٩]',' ',x)))
df_test['Text']=df_test['Text'].apply((lambda x: re.sub(r'[٠١٢٣٤٥٦٧٨٩]',' ',x)))
# 4.5.2 Removing all Arabic numerial or digits from image text
df_train['Image_Text']=df_train['Image_Text'].apply((lambda x: re.sub(r'[٠١٢٣٤٥٦٧٨٩]',' ',x)))
df_test['Image_Text']=df_test['Image_Text'].apply((lambda x: re.sub(r'[٠١٢٣٤٥٦٧٨٩]',' ',x)))

# 4.5 Replace '_' with white space

In [8]:
#4.5.1 Replace '_' with white space from tweet text
df_train['Text']=df_train['Text'].apply((lambda x: re.sub('_',' ',x)))
df_test['Text']=df_test['Text'].apply((lambda x: re.sub('_',' ',x)))
#4.5.2 Replace '_' with white space from image text
df_train['Image_Text']=df_train['Image_Text'].apply((lambda x: re.sub('_',' ',x)))
df_test['Image_Text']=df_test['Image_Text'].apply((lambda x: re.sub('_',' ',x)))

# 4.6 Tokenization using NLTK

In [9]:
#4.6.1 Tokenization using NLTK applied on tweet text                                                          
import nltk
def identify_tokens(row):
    text= row 
    #print(poem)# output is entire poem text
    tokens = nltk.word_tokenize(text)
    #print('tokens of poem are')
    #print(tokens)
    #aa=input("need more?")
    return tokens
df_train['Text'] = df_train['Text'].apply(identify_tokens)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Text'] = df_test['Text'].apply(identify_tokens)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))
                                                          
#4.6.2 Tokenization using NLTK applied on tweet text 
def identify_tokens(row):
    text= row
    #print(poem)# output is entire poem text
    tokens = nltk.word_tokenize(text)
    #print('tokens of poem are')
    #print(tokens)
    #aa=input("need more?")
    return tokens
df_train['Image_Text'] = df_train['Image_Text'].apply(identify_tokens)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Image_Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Image_Text'] = df_test['Image_Text'].apply(identify_tokens)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Image_Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))

# 4.7 Removing stopwords and characters having length<=1 from text

In [10]:
#4.7.1 importing manually compiled Urdu stopwords list having 414 entries
urdu_stop_word_file='C:\\Users\\FurqanSaddozai\\scrapping_tweets_16_july_2022\\Uni-multi-urdu-HS\\DFF_2023\\Urdu_stopwords.txt'
import codecs
f=codecs.open(urdu_stop_word_file,'r','utf-8-sig')
text=f.read()
#print(text)
stops=text.split()
#print("value of aa is equal to========")
#print(stops)
f.close()

#4.7.2 printing stopwords information
#print('\n length of stopwords list='+str(len(stops)))
#print('\n Entire stopwords list is='); print(stops)

#4.7.3 removing stopwords and single characters from Tweet text
def remove_stops(row):
    my_list = row
    meaningful_words1 = [w for w in my_list if not w in stops]
    meaningful_words2 = [w for w in meaningful_words1 if len(w)>1]
    return (meaningful_words2)

df_train['Text'] = df_train['Text'].apply(remove_stops)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Text'] = df_test['Text'].apply(remove_stops)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))

#4.7.4 removing stopwords and single characters from image text
df_train['Image_Text'] = df_train['Image_Text'].apply(remove_stops)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Image_Text'] = df_test['Image_Text'].apply(remove_stops)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))

# 4.8 rejoining tokenized words into strings/text

In [11]:
#4.8.1 rejoining tokenized words into strings/text from tweet text
def rejoin_words(row):
    my_list = row
    joined_words = ( " ".join(my_list))
    return joined_words

df_train['Text'] = df_train['Text'].apply(rejoin_words)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Text'] = df_test['Text'].apply(rejoin_words)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))

#4.8.2 rejoining tokenized words into strings/text from image text
df_train['Image_Text'] = df_train['Image_Text'].apply(rejoin_words)# apply method has axis column showing:
#print('\nTokenized first training tweet is=');print(df_train['Text'][0])
#print('\nNumber of Tokens in first training Tweet=');print(len(df_train['Text'][0]))

df_test['Image_Text'] = df_test['Image_Text'].apply(rejoin_words)# apply method has axis column showing:
#print('\nTokenized first test tweet is=');print(df_test['Text'][0])
#print('\nNumber of Tokens in first test tweet are=');print(len(df_test['Text'][0]))

# 4.9. Replacing all empty cells(or cell where size of string is one) values with 'NIL' value for Image_Text column 

In [12]:
# Replace empty values with 'NIL'
df_train['Image_Text'] = df_train['Image_Text'].replace('', 'NIL')
df_test['Image_Text']=df_test['Image_Text'].replace('', 'NIL')
#print(df)

# 5. Vectorizing, sequence Generation and Padding Sequences

In [13]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#5.1 Tokenize and pad sequences for Tweet text
max_features = 5300
max_len=50  # here 54 is the number of words in longest training tweet text
tokenizer_text = Tokenizer(num_words=max_features, split=' ')
tokenizer_text.fit_on_texts(df_train['Text'])
df_train['Padded_Text'] = tokenizer_text.texts_to_sequences(df_train['Text'])
df_train['Padded_Text'] = pad_sequences(df_train['Padded_Text'], maxlen=max_len).tolist()

#tokenizer.fit_on_texts(X_test.values)
df_test['Padded_Text'] = tokenizer_text.texts_to_sequences(df_test['Text'])
df_test['Padded_Text'] = pad_sequences(df_test['Padded_Text'], maxlen=max_len).tolist()


#5.2 Tokenize and pad sequences for  Image text
#first remove all NIL/nil values from cell and place a '' value at corresponding cell
# Exclude 'NIL' or 'nil' values for tokenization and padding
df_train['Image_Text']= df_train['Image_Text'].apply(lambda x: '' if x.lower() == 'nil' else x)
df_test['Image_Text']= df_test['Image_Text'].apply(lambda x: '' if x.lower() == 'nil' else x)

max_features_image_text = 5300
max_len_image_text=45
tokenizer_image_text = Tokenizer(num_words=max_features_image_text, split=' ')
tokenizer_image_text.fit_on_texts(df_train['Image_Text'])
df_train['Padded_Image_Text'] = tokenizer_image_text.texts_to_sequences(df_train['Image_Text'])
df_train['Padded_Image_Text'] = pad_sequences(df_train['Padded_Image_Text'], maxlen=max_len_image_text).tolist()

df_test['Padded_Image_Text'] = tokenizer_image_text.texts_to_sequences(df_test['Image_Text'])
df_test['Padded_Image_Text'] = pad_sequences(df_test['Padded_Image_Text'], maxlen=max_len_image_text).tolist()

In [14]:
print(df_test['Padded_Image_Text'][0])
print(df_train['Padded_Image_Text'][0])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [15]:
#print("Tokenizer Word Index for Text:", tokenizer_text.word_index)
#print("Tokenizer Word Index for Image_Text:", tokenizer_image_text.word_index)

In [16]:
print("length Word Index for Text:", str(len(tokenizer_text.word_index)))
print("Length Word Index for Image_Text:", str(len(tokenizer_image_text.word_index)))

length Word Index for Text: 21506
Length Word Index for Image_Text: 11837


In [17]:
#df_train

In [18]:
#Function to fetch exxtension of an image
import os
#import tensorflow as tf

def find_image_extension(image_directory, tweet_id):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']  # Add more extensions if needed
    
    for extension in image_extensions:
        image_path = os.path.join(image_directory, f"{tweet_id}{extension}")
        if os.path.exists(image_path):
            return extension
    return None  # Return None if no matching extension is found

In [19]:
# Function to check if a list contains only zeros
def contains_only_zeros(lst):
    return all(element == 0 for element in lst)

In [20]:
# Defined our data augmentation transformations training
data_augmentation = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0 / 255.0,  # Rescale pixel values to [0, 1]
    rotation_range=40,    # Rotate images by up to 20 degrees
    width_shift_range=0.2, # Shift images horizontally by up to 20% of the width
    height_shift_range=0.2, # Shift images vertically by up to 20% of the height
    shear_range = 0.2, 
    zoom_range = 0.2,
    horizontal_flip=True,  # Flip images horizontally
    vertical_flip=False    # Don't flip images vertically
)

In [21]:
# Defined our data augmentation transformations test
data_augmentation_test = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0 / 255.0,  # Rescale pixel values to [0, 1]
)

In [22]:
#for training data
image_data = []  # To store image features for images
image_texts_present = []  # To keep track of whether image text is present or not

for _, row in df_train.iterrows():
    # Check if the padded sequence is entirely zero
    result=contains_only_zeros(row['Padded_Image_Text'])
    if (result== True):
        image_texts_present.append(0)
        # For images without associated text, preprocess and store the image
        image_directory=os.path.join('MMHS11K_RGB_train', row['Label'])
        extension=find_image_extension(image_directory,row['Tweet_Id'])
        image_path = os.path.join(image_directory, str(row['Tweet_Id']) + extension)
        if os.path.exists(image_path):
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = data_augmentation.random_transform(image)  # Apply augmentation
            image = tf.keras.applications.efficientnet.preprocess_input(image)
            image_data.append(image)
        else: 
            print('Image not found. Please check code')
    else:
        image_texts_present.append(1)
        # Process the image and extract features using EfficientNetB1
        image_directory=os.path.join('MMHS11K_RGB_train', row['Label'])
        #image_directory=str(image_directory)
        extension=find_image_extension(image_directory,row['Tweet_Id'])
        image_path = os.path.join(image_directory, str(row['Tweet_Id']) + extension)
        if os.path.exists(image_path):
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = data_augmentation.random_transform(image)  # Apply augmentation
            image = tf.keras.applications.efficientnet.preprocess_input(image)
            image_data.append(image)
        else: 
            print('Image not found. Please check code')

image_data=np.array(image_data)#for all images in training set
image_texts_present = np.array(image_texts_present)

In [23]:
#for test data
image_data_test = [] # To store image features from test images
image_texts_present_test = []  # To keep track of whether image_text is present or not

for _, row in df_test.iterrows():
    # Check if the padded sequence is entirely zero
    result=contains_only_zeros(row['Padded_Image_Text'])
    if (result== True):
        image_texts_present_test.append(0)
        # For images without associated text, preprocess and store the image
        image_directory=os.path.join('MMHS11K_RGB_test', row['Label'])
        extension=find_image_extension(image_directory,row['Tweet_Id'])
        image_path = os.path.join(image_directory, str(row['Tweet_Id']) + extension)
        if os.path.exists(image_path):
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = data_augmentation_test.random_transform(image)  # Apply augmentation to test dataset
            image = tf.keras.applications.efficientnet.preprocess_input(image)
            image_data_test.append(image)
        else: 
            print('Image not found. Please check code')
    else:
        image_texts_present_test.append(1)
        # Process the image and extract features using EfficientNetB1
        image_directory=os.path.join('MMHS11K_RGB_test', row['Label'])
        extension=find_image_extension(image_directory,row['Tweet_Id'])
        image_path = os.path.join(image_directory, str(row['Tweet_Id']) + extension)
        if os.path.exists(image_path):
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = data_augmentation.random_transform(image)  # Apply augmentation
            image = tf.keras.applications.efficientnet.preprocess_input(image)
            image_data_test.append(image)
        else: 
            print('Image not found. Please check code')
image_data_test = np.array(image_data_test) #for all images of test set
image_texts_present_test = np.array(image_texts_present_test)

In [24]:
#extracting pretrained embeddings for Tweet_Text
from gensim.models import word2vec
model = word2vec.KeyedVectors.load_word2vec_format('urduvec_140M_100K_300d.bin', binary=True)
#only selected first 100 embeddings from 300 embeddings
import numpy as np

embedding_dim = 300  # Adjust based on your pretrained embedding dimension
selected_embedding_dim = 80  # Number of dimensions to select

num_words = len(tokenizer_text.word_index) + 1
embedding_matrix_text = np.zeros((num_words, selected_embedding_dim))  # Use selected_embedding_dim

for word, index in tokenizer_text.word_index.items():
    if word in model:
        full_embedding = model[word]  # Assuming model[word] gives you a 300-dimensional embedding
        selected_embedding = full_embedding[:selected_embedding_dim]  # Extract first 100 dimensions
        embedding_matrix_text[index] = selected_embedding

In [25]:
#extracting pretrained embeddings for Image_Text
embedding_dim = 300  # Adjust based on your pretrained embedding dimension
selected_embedding_dim = 80  # Number of dimensions to select

num_words = len(tokenizer_image_text.word_index) + 1
embedding_matrix_Img_text = np.zeros((num_words, selected_embedding_dim))  # Use selected_embedding_dim

for word, index in tokenizer_image_text.word_index.items():
    if word in model:
        full_embedding = model[word]  # Assuming model[word] gives you a 300-dimensional embedding
        selected_embedding = full_embedding[:selected_embedding_dim]  # Extract first100 dimensions
        embedding_matrix_Img_text[index] = selected_embedding

In [26]:
#Now, when building the model, you can use a conditional LSTM to process image text only if image text is available:

# Build the multimodal model
#text_vocab_size = len(tokenizer_text.word_index) + 1
#print('size of text oriented dictionary=')
#print(text_vocab_size)
#image_input_shape = (224, 224, 3)
#use_image_text = any(image_texts_present)  # Set this based on your data

#1. We define a function build_model to create the multimodal neural network. 
#It takes arguments for text vocabulary size, maximum text length, image input shape, and whether image text is present.
#2. The model architecture consists of three main parts: processing of the main text, processing of the image, 
#and conditional processing of image text (if available).

def build_model(text_vocab_size, max_text_length, image_input_shape,embedding_matrix_text, embedding_matrix_Img_text):   
    # Text input and embedding
    #The text data is embedded using an embedding layer followed by an LSTM layer.

    text_input = Input(shape=(max_text_length,))
    text_embedding = Embedding(input_dim=text_vocab_size, output_dim=80,
                               weights=[embedding_matrix_text], trainable=False)(text_input)    
    text_bilstm = Bidirectional(LSTM(150))(text_embedding)
    text_bilstm=Dense(512, activation='relu')(text_bilstm)


    
    # Image input and feature extraction
    #4. The image data is passed through EfficientNetB1, and global average pooling is applied to get image features.
    image_input = Input(shape=image_input_shape)
    image_model = EfficientNetB1(include_top=False, weights='imagenet', input_shape=image_input_shape)(image_input)
    image_features = tf.keras.layers.GlobalAveragePooling2D()(image_model)
    image_features = tf.keras.layers.Dense(512, activation='relu')(image_features)  # Reduce to 512 features

    
    #  LSTM for image_text
    #5. If image text is available, it's also embedded using an embedding layer and processed with an LSTM layer.
    image_text_input = Input(shape=(45,))
    image_text_vocab_size=len(tokenizer_image_text.word_index)+1
    image_text_embedding = Embedding(input_dim=image_text_vocab_size, output_dim=80,
                                         weights=[embedding_matrix_Img_text], trainable=False)(image_text_input)
    image_text_bilstm = Bidirectional(LSTM(150))(image_text_embedding)
    image_text_bilstm=Dense(512, activation='relu')(image_text_bilstm)
    combined_features = Concatenate()([text_bilstm, image_features, image_text_bilstm])
        
    #combined_features = Dropout(0.4)(combined_features)
    combined_features = Dense(1024, activation='relu')(combined_features)
    combined_features = Dense(512, activation='relu')(combined_features)
    combined_features = Dropout(0.4)(combined_features)
    combined_features = Dense(2, activation='sigmoid')(combined_features)
#7. All extracted features are concatenated, followed by dropout and a final dense layer for prediction.
    return tf.keras.Model(inputs=[text_input, image_input, image_text_input], outputs=combined_features)

In [27]:
# Example usage
text_vocab_size = len(tokenizer_text.word_index)+1
max_text_length = 50
image_input_shape = (224, 224, 3)
max_image_text_length = 45

# Create or load your embedding matrices based on your code
embedding_matrix_text = embedding_matrix_text  # Your text embedding matrix
embedding_matrix_Img_text = embedding_matrix_Img_text  # Your image text embedding matrix

model = build_model(text_vocab_size, max_text_length, image_input_shape, embedding_matrix_text, embedding_matrix_Img_text)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 50)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 45)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 50, 80)               1720560   ['input_1[0][0]']             
                                                                                              

In [28]:
# Compile the model
#7. The model is compiled using the specified optimizer and loss function.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
df_test['Padded_Text']

0       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
                              ...                        
2195    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2196    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2197    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2198    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2199    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Name: Padded_Text, Length: 2200, dtype: object

In [30]:
len(df_train['Padded_Image_Text'])

8800

In [31]:
from tensorflow.keras.utils import to_categorical
# Convert labels to numerical values for training data
labels=df_train['Label'].map({'Hate': 1, 'No_Hate': 0})
labels = to_categorical(labels)

labels1=df_test['Label'].map({'Hate': 1, 'No_Hate': 0})
labels1 = to_categorical(labels1)

In [32]:
train_text_list=df_train['Padded_Text'].to_list()
train_text=np.stack(train_text_list)#Done
train_image=image_data
image_text_padded_list=df_train['Padded_Image_Text'].to_list()
image_text_padded=np.stack(image_text_padded_list)#Done
train_labels=labels #Done
#-------------------------
#val_text=df_test['Padded_Text']
#val_image=
#image_text_padded=df_test['Padded_Image_Text']
#val_labels=labels1
test_text_list= df_test['Padded_Text'].to_list() #539 test samples with image_text
test_text=np.stack(test_text_list)
test_image=image_data_test
test_image_text_padded_list=df_test['Padded_Image_Text'].to_list()
test_image_text_padded= np.stack(test_image_text_padded_list)
test_labels=labels1

In [33]:
print(test_text.shape)
print(test_image.shape)
print(test_image_text_padded.shape)
print(test_labels.shape)

(2200, 50)
(2200, 224, 224, 3)
(2200, 45)
(2200, 2)


In [34]:
#Now, let's train the model:
#1. We train the model using the training data. 
#If image text is available, we provide all three inputs (text, image, and image text). 
#Otherwise, we provide only text and image inputs.
#2. The training process is set to run for 30 epochs with a batch size of 32.
#3. 
from tensorflow.keras.callbacks import EarlyStopping

# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model
model.fit(
        [train_text, train_image, image_text_padded],
        train_labels,
        epochs=20,
        batch_size=22,
        validation_data=([test_text, test_image, test_image_text_padded], test_labels),callbacks=[early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


<keras.src.callbacks.History at 0x0x22559392c15>

In [36]:
loss, accuracy=model.evaluate([test_text, test_image, test_image_text_padded], test_labels)



In [37]:
print(loss)
print(accuracy)

0.3909512314588912
0.8014413244792832


In [38]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Compute evaluation metrics
predictions = model.predict([test_text, test_image, test_image_text_padded])
predicted_labels = np.argmax(predictions, axis=1)

# Calculate additional metrics
precision = precision_score(np.argmax(test_labels, axis=1), predicted_labels)
recall = recall_score(np.argmax(test_labels, axis=1), predicted_labels)
f1 = f1_score(np.argmax(test_labels, axis=1), predicted_labels)

# Print the evaluation metrics
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

Loss: 0.3909
Accuracy: 0.80144
Precision: 0.79021
Recall: 0.78014
F1-score: 0.7841
