import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Load the pre-trained tokenizer and model for GPT-2 large
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialogRPT-large")
model = AutoModelForQuestionAnswering.from_pretrained("microsoft/DialogRPT-large")

# Load the SuQAD MRC dataset
from datasets import load_dataset
dataset = load_dataset("squad_v2")

# Select a sample from the dataset
sample = dataset['validation'][0]

# Tokenize the input question and context
inputs = tokenizer.encode_plus(sample['question'], sample['context'], return_tensors="pt")

# Use the model to predict the answer span
start_scores, end_scores = model(**inputs).values()
start_index = torch.argmax(start_scores)
end_index = torch.argmax(end_scores) + 1
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][start_index:end_index]))

# Print the answer
print(answer)
