import pandas as pd import matplotlib.pyplot as plt # Load the dataset dataset = pd.read_csv(r"E:\Muzamil Work\PHD Computer Science\My Submissions\QA MRC\DataSets\WikiQACorpus\WikiQA-train.tsv", sep='\t') # Create a list to store the word count for each document word_count = [] # Loop through the rows of the dataset and count the number of words in each document for i in range(len(dataset)): words = dataset.iloc[i]['Sentence'] word_count.append(len(words.split())) # Plot the graph plt.hist(word_count, bins=50, color="Red") plt.xlabel("Passage Word Count") plt.ylabel("Frequency") plt.show()