{ "cells": [ { "cell_type": "markdown", "id": "887daafb", "metadata": { "id": "887daafb", "papermill": { "duration": 0.024824, "end_time": "2021-10-19T03:46:33.389149", "exception": false, "start_time": "2021-10-19T03:46:33.364325", "status": "completed" }, "pycharm": { "name": "#%% md\n" }, "tags": [] }, "source": [ "# Evaluation of a QA System" ] }, { "cell_type": "code", "execution_count": null, "id": "d3c812a6", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:46:33.522395Z", "iopub.status.busy": "2021-10-19T03:46:33.521403Z", "iopub.status.idle": "2021-10-19T03:47:59.664215Z", "shell.execute_reply": "2021-10-19T03:47:59.663623Z", "shell.execute_reply.started": "2021-10-04T22:30:22.50613Z" }, "id": "d3c812a6", "papermill": { "duration": 86.251735, "end_time": "2021-10-19T03:47:59.664377", "exception": false, "start_time": "2021-10-19T03:46:33.412642", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "# Install the latest release of Haystack in your own environment \n", "# ! pip install farm-haystack\n", "\n", "# Install the latest master of Haystack\n", "!pip install grpcio-tools==1.32.0\n", "!pip install grpcio==1.32.0\n", "!pip install git+https://github.com/deepset-ai/haystack.git\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "82355ca2", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:47:59.930202Z", "iopub.status.busy": "2021-10-19T03:47:59.925438Z", "iopub.status.idle": "2021-10-19T03:49:10.441489Z", "shell.execute_reply": "2021-10-19T03:49:10.440950Z" }, "id": "82355ca2", "papermill": { "duration": 70.651102, "end_time": "2021-10-19T03:49:10.441623", "exception": false, "start_time": "2021-10-19T03:47:59.790521", "status": "completed" }, "pycharm": { "is_executing": true, "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", "! chown -R daemon:daemon elasticsearch-7.9.2\n", "\n", "import os\n", "from subprocess import Popen, PIPE, STDOUT\n", "es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],\n", " stdout=PIPE, stderr=STDOUT,\n", " preexec_fn=lambda: os.setuid(1) # as daemon\n", " )\n", "# wait until ES has started\n", "! sleep 30" ] }, { "cell_type": "code", "execution_count": null, "id": "6eb8338f", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:49:10.697651Z", "iopub.status.busy": "2021-10-19T03:49:10.696852Z", "iopub.status.idle": "2021-10-19T03:49:24.033202Z", "shell.execute_reply": "2021-10-19T03:49:24.032671Z" }, "id": "6eb8338f", "papermill": { "duration": 13.465642, "end_time": "2021-10-19T03:49:24.033327", "exception": false, "start_time": "2021-10-19T03:49:10.567685", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "from haystack.modeling.utils import initialize_device_settings\n", "\n", "device, n_gpu = initialize_device_settings(use_cuda=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "9555f38a", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:49:24.299220Z", "iopub.status.busy": "2021-10-19T03:49:24.298275Z", "iopub.status.idle": "2021-10-19T03:49:24.890060Z", "shell.execute_reply": "2021-10-19T03:49:24.890883Z" }, "id": "9555f38a", "papermill": { "duration": 0.729858, "end_time": "2021-10-19T03:49:24.891136", "exception": false, "start_time": "2021-10-19T03:49:24.161278", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "# Connect to Elasticsearch\n", "from haystack.document_store.elasticsearch import ElasticsearchDocumentStore\n", "\n", "# Connect to Elasticsearch\n", "document_store = ElasticsearchDocumentStore(host=\"localhost\", username=\"\", password=\"\",\n", " create_index=True, embedding_field=\"emb\",\n", " embedding_dim=768, excluded_meta_data=[\"emb\"], timeout=300000)" ] }, { "cell_type": "code", "execution_count": null, "id": "bafb7bb6", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:49:25.184667Z", "iopub.status.busy": "2021-10-19T03:49:25.183815Z", "iopub.status.idle": "2021-10-19T03:49:25.186281Z", "shell.execute_reply": "2021-10-19T03:49:25.185887Z" }, "papermill": { "duration": 0.1664, "end_time": "2021-10-19T03:49:25.186393", "exception": false, "start_time": "2021-10-19T03:49:25.019993", "status": "completed" }, "tags": [], "id": "bafb7bb6" }, "outputs": [], "source": [ "from haystack.preprocessor.utils import convert_files_to_dicts" ] }, { "cell_type": "code", "execution_count": null, "id": "9a7fd445", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T03:49:25.445217Z", "iopub.status.busy": "2021-10-19T03:49:25.444605Z", "iopub.status.idle": "2021-10-19T04:47:32.926205Z", "shell.execute_reply": "2021-10-19T04:47:32.926675Z" }, "papermill": { "duration": 3487.612983, "end_time": "2021-10-19T04:47:32.926866", "exception": false, "start_time": "2021-10-19T03:49:25.313883", "status": "completed" }, "tags": [], "id": "9a7fd445" }, "outputs": [], "source": [ "from haystack.preprocessor.utils import convert_files_to_dicts\n", "dicts = convert_files_to_dicts('../input/arabicwikinew/arabic-wiki',split_paragraphs=True)\n", "document_store.write_documents(dicts,batch_size = 10000,duplicate_documents ='skip')" ] }, { "cell_type": "code", "execution_count": null, "id": "74e6d4b7", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:47:33.529931Z", "iopub.status.busy": "2021-10-19T04:47:33.529117Z", "iopub.status.idle": "2021-10-19T04:47:33.553256Z", "shell.execute_reply": "2021-10-19T04:47:33.554188Z" }, "papermill": { "duration": 0.327286, "end_time": "2021-10-19T04:47:33.554394", "exception": false, "start_time": "2021-10-19T04:47:33.227108", "status": "completed" }, "tags": [], "id": "74e6d4b7" }, "outputs": [], "source": [ "document_store.get_document_count()" ] }, { "cell_type": "code", "execution_count": null, "id": "f975b0e3", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:47:34.302266Z", "iopub.status.busy": "2021-10-19T04:47:34.301311Z", "iopub.status.idle": "2021-10-19T04:47:34.381122Z", "shell.execute_reply": "2021-10-19T04:47:34.381782Z" }, "papermill": { "duration": 0.384009, "end_time": "2021-10-19T04:47:34.381989", "exception": false, "start_time": "2021-10-19T04:47:33.997980", "status": "completed" }, "tags": [], "id": "f975b0e3" }, "outputs": [], "source": [ "# by BM25 retreiver\n", "document_store.query('أين تقع فلسطين ؟',top_k = 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "b7612503", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:47:34.984213Z", "iopub.status.busy": "2021-10-19T04:47:34.983457Z", "iopub.status.idle": "2021-10-19T04:47:35.095537Z", "shell.execute_reply": "2021-10-19T04:47:35.095945Z" }, "papermill": { "duration": 0.414663, "end_time": "2021-10-19T04:47:35.096142", "exception": false, "start_time": "2021-10-19T04:47:34.681479", "status": "completed" }, "tags": [], "id": "b7612503" }, "outputs": [], "source": [ "import pandas as pd\n", "pd.read_json('../input/tydiaqatestset/tydiqa-goldp-dev-arabic.json')['data'][0]" ] }, { "cell_type": "code", "execution_count": null, "id": "48eb112d", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:47:35.696182Z", "iopub.status.busy": "2021-10-19T04:47:35.695325Z", "iopub.status.idle": "2021-10-19T04:48:06.910265Z", "shell.execute_reply": "2021-10-19T04:48:06.900197Z" }, "papermill": { "duration": 31.515911, "end_time": "2021-10-19T04:48:06.910390", "exception": false, "start_time": "2021-10-19T04:47:35.394479", "status": "completed" }, "tags": [], "id": "48eb112d" }, "outputs": [], "source": [ "from haystack.reader.farm import FARMReader\n", "\n", "reader = FARMReader(\"wissamantoun/araelectra-base-artydiqa\", top_k=3, return_no_answer=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "99a27bb5", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:07.526914Z", "iopub.status.busy": "2021-10-19T04:48:07.526019Z", "iopub.status.idle": "2021-10-19T04:48:07.596773Z", "shell.execute_reply": "2021-10-19T04:48:07.596296Z" }, "papermill": { "duration": 0.385369, "end_time": "2021-10-19T04:48:07.596897", "exception": false, "start_time": "2021-10-19T04:48:07.211528", "status": "completed" }, "tags": [], "id": "99a27bb5" }, "outputs": [], "source": [ "from haystack.eval import EvalAnswers, EvalDocuments\n", "\n", "# Here we initialize the nodes that perform evaluation\n", "eval_retriever = EvalDocuments(debug=True,top_k=10)\n", "eval_reader = EvalAnswers(sas_model=\"sentence-transformers/paraphrase-multilingual-mpnet-base-v2\",open_domain=True,debug=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "c85d3cd6", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:08.202273Z", "iopub.status.busy": "2021-10-19T04:48:08.201460Z", "iopub.status.idle": "2021-10-19T04:48:08.203809Z", "shell.execute_reply": "2021-10-19T04:48:08.204278Z" }, "papermill": { "duration": 0.305315, "end_time": "2021-10-19T04:48:08.204431", "exception": false, "start_time": "2021-10-19T04:48:07.899116", "status": "completed" }, "tags": [], "id": "c85d3cd6" }, "outputs": [], "source": [ "doc_index = \"eval_docs\"\n", "label_index = \"eval_labels\"" ] }, { "cell_type": "code", "execution_count": null, "id": "b3f8415e", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:08.819947Z", "iopub.status.busy": "2021-10-19T04:48:08.819250Z", "iopub.status.idle": "2021-10-19T04:48:16.427960Z", "shell.execute_reply": "2021-10-19T04:48:16.429062Z" }, "id": "b3f8415e", "papermill": { "duration": 7.925414, "end_time": "2021-10-19T04:48:16.429280", "exception": false, "start_time": "2021-10-19T04:48:08.503866", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "from haystack.preprocessor import PreProcessor\n", "\n", "# Add evaluation data to Elasticsearch Document Store\n", "# We first delete the custom tutorial indices to not have duplicate elements\n", "# and also split our documents into shorter passages using the PreProcessor\n", "preprocessor = PreProcessor(\n", " split_length=500,\n", " split_overlap=0,\n", " split_respect_sentence_boundary=False,\n", " clean_empty_lines=False,\n", " clean_whitespace=False\n", ")\n", "# document_store.delete_all_documents(index=doc_index)\n", "# document_store.delete_all_documents(index=label_index)\n", "document_store.add_eval_data(\n", " filename=\"../input/tydiaqatestset/tydiqa-goldp-dev-arabic.json\",\n", " doc_index=doc_index,\n", " label_index=label_index,\n", " preprocessor=preprocessor\n", ")\n", "\n", "# Let's prepare the labels that we need for the retriever and the reader\n", "labels = document_store.get_all_labels_aggregated(index=label_index)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4c738f02", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:17.187833Z", "iopub.status.busy": "2021-10-19T04:48:17.187253Z", "iopub.status.idle": "2021-10-19T04:48:17.189832Z", "shell.execute_reply": "2021-10-19T04:48:17.190336Z" }, "papermill": { "duration": 0.310839, "end_time": "2021-10-19T04:48:17.190485", "exception": false, "start_time": "2021-10-19T04:48:16.879646", "status": "completed" }, "tags": [], "id": "4c738f02" }, "outputs": [], "source": [ "len(labels)" ] }, { "cell_type": "code", "execution_count": null, "id": "73d27621", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:17.798092Z", "iopub.status.busy": "2021-10-19T04:48:17.797295Z", "iopub.status.idle": "2021-10-19T04:48:17.811758Z", "shell.execute_reply": "2021-10-19T04:48:17.812452Z" }, "papermill": { "duration": 0.319192, "end_time": "2021-10-19T04:48:17.812655", "exception": false, "start_time": "2021-10-19T04:48:17.493463", "status": "completed" }, "tags": [], "id": "73d27621" }, "outputs": [], "source": [ "print(document_store.get_document_count(index = 'eval_docs'),document_store.get_document_count(index = 'document'))" ] }, { "cell_type": "markdown", "id": "88f0526f", "metadata": { "id": "88f0526f", "papermill": { "duration": 0.303114, "end_time": "2021-10-19T04:48:18.469314", "exception": false, "start_time": "2021-10-19T04:48:18.166200", "status": "completed" }, "pycharm": { "name": "#%% md\n" }, "tags": [] }, "source": [ "## Initialize components of QA-System" ] }, { "cell_type": "code", "execution_count": null, "id": "3d4a5eea", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T04:48:19.090024Z", "iopub.status.busy": "2021-10-19T04:48:19.089463Z", "iopub.status.idle": "2021-10-19T06:40:09.143216Z", "shell.execute_reply": "2021-10-19T06:40:09.142590Z" }, "id": "3d4a5eea", "papermill": { "duration": 6710.364003, "end_time": "2021-10-19T06:40:09.143989", "exception": false, "start_time": "2021-10-19T04:48:18.779986", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "# Initialize Retriever\n", "from haystack.retriever.dense import DensePassageRetriever\n", "retriever = DensePassageRetriever(document_store=document_store,\n", " query_embedding_model=\"../input/dprarcdtydiqa/saved_models_DPR_tydiqa+arcd/query_encoder\",\n", " passage_embedding_model=\"../input/dprarcdtydiqa/saved_models_DPR_tydiqa+arcd/passage_encoder\",\n", " use_gpu=True,\n", " embed_title=True,\n", " batch_size=4)\n", "document_store.update_embeddings(retriever=retriever,update_existing_embeddings=False,batch_size=10000)" ] }, { "cell_type": "code", "execution_count": null, "id": "6267809b", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:09.813401Z", "iopub.status.busy": "2021-10-19T06:40:09.804142Z", "iopub.status.idle": "2021-10-19T06:40:09.829664Z", "shell.execute_reply": "2021-10-19T06:40:09.831177Z" }, "papermill": { "duration": 0.36377, "end_time": "2021-10-19T06:40:09.831332", "exception": false, "start_time": "2021-10-19T06:40:09.467562", "status": "completed" }, "tags": [], "id": "6267809b" }, "outputs": [], "source": [ "print(document_store.get_document_count(index = 'eval_docs'),document_store.get_document_count(index = 'document'))" ] }, { "cell_type": "markdown", "id": "c5fea774", "metadata": { "papermill": { "duration": 0.354439, "end_time": "2021-10-19T06:40:10.586454", "exception": false, "start_time": "2021-10-19T06:40:10.232015", "status": "completed" }, "tags": [], "id": "c5fea774" }, "source": [ "# Get examples of what retreiver retreive" ] }, { "cell_type": "code", "execution_count": null, "id": "14bf8a9f", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:11.590499Z", "iopub.status.busy": "2021-10-19T06:40:11.589920Z", "iopub.status.idle": "2021-10-19T06:40:17.799869Z", "shell.execute_reply": "2021-10-19T06:40:17.800608Z" }, "papermill": { "duration": 6.617333, "end_time": "2021-10-19T06:40:17.800776", "exception": false, "start_time": "2021-10-19T06:40:11.183443", "status": "completed" }, "tags": [], "id": "14bf8a9f" }, "outputs": [], "source": [ "retriever.save('retriever')" ] }, { "cell_type": "code", "execution_count": null, "id": "9f7edf0f", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:18.512045Z", "iopub.status.busy": "2021-10-19T06:40:18.511341Z", "iopub.status.idle": "2021-10-19T06:40:25.228537Z", "shell.execute_reply": "2021-10-19T06:40:25.202593Z" }, "papermill": { "duration": 7.072381, "end_time": "2021-10-19T06:40:25.228719", "exception": false, "start_time": "2021-10-19T06:40:18.156338", "status": "completed" }, "tags": [], "id": "9f7edf0f" }, "outputs": [], "source": [ "retriever.run_query('أين تقع فلسطين',top_k = 2)" ] }, { "cell_type": "markdown", "id": "55bc7cb6", "metadata": { "id": "55bc7cb6", "papermill": { "duration": 0.333041, "end_time": "2021-10-19T06:40:26.111002", "exception": false, "start_time": "2021-10-19T06:40:25.777961", "status": "completed" }, "pycharm": { "name": "#%% md\n" }, "tags": [] }, "source": [ "## Evaluation of Retriever\n", "Here we evaluate only the retriever, based on whether the gold_label document is retrieved." ] }, { "cell_type": "code", "execution_count": null, "id": "c3eacff6", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:26.760554Z", "iopub.status.busy": "2021-10-19T06:40:26.759731Z", "iopub.status.idle": "2021-10-19T06:40:40.391899Z", "shell.execute_reply": "2021-10-19T06:40:40.390990Z" }, "papermill": { "duration": 13.958698, "end_time": "2021-10-19T06:40:40.392027", "exception": false, "start_time": "2021-10-19T06:40:26.433329", "status": "completed" }, "tags": [], "id": "c3eacff6" }, "outputs": [], "source": [ "document_store.update_embeddings(retriever, index=doc_index)" ] }, { "cell_type": "code", "execution_count": null, "id": "d61caf51", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:41.042476Z", "iopub.status.busy": "2021-10-19T06:40:41.041481Z", "iopub.status.idle": "2021-10-19T06:40:41.044020Z", "shell.execute_reply": "2021-10-19T06:40:41.043510Z" }, "id": "d61caf51", "papermill": { "duration": 0.329461, "end_time": "2021-10-19T06:40:41.044130", "exception": false, "start_time": "2021-10-19T06:40:40.714669", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "# ## Evaluate Retriever on its own\n", "# retriever_eval_results = retriever.eval(top_k=20,open_domain=True,return_preds=True)\n", "# ## Retriever Recall is the proportion of questions for which the correct document containing the answer is\n", "# ## among the correct documents\n", "# print(\"Retriever Recall:\", retriever_eval_results['metrics']['recall'])\n", "# ## Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank\n", "# print(\"Retriever Mean Avg Precision:\", retriever_eval_results['metrics'][\"map\"])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "861f2c79", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:41.689193Z", "iopub.status.busy": "2021-10-19T06:40:41.688320Z", "iopub.status.idle": "2021-10-19T06:40:41.690543Z", "shell.execute_reply": "2021-10-19T06:40:41.690911Z", "shell.execute_reply.started": "2021-10-04T21:43:15.460296Z" }, "papermill": { "duration": 0.328895, "end_time": "2021-10-19T06:40:41.691045", "exception": false, "start_time": "2021-10-19T06:40:41.362150", "status": "completed" }, "tags": [], "id": "861f2c79" }, "outputs": [], "source": [ "def get_list_of_passages(doc,top_k):\n", " docs_list = []\n", " for i in range(top_k):\n", "# print(doc[i])\n", " docs_list.append(doc[i].to_dict()['content'])\n", " return docs_list" ] }, { "cell_type": "code", "execution_count": null, "id": "21748a7c", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:42.342087Z", "iopub.status.busy": "2021-10-19T06:40:42.341252Z", "iopub.status.idle": "2021-10-19T06:40:42.343800Z", "shell.execute_reply": "2021-10-19T06:40:42.343355Z", "shell.execute_reply.started": "2021-10-04T21:43:15.472121Z" }, "papermill": { "duration": 0.332044, "end_time": "2021-10-19T06:40:42.343915", "exception": false, "start_time": "2021-10-19T06:40:42.011871", "status": "completed" }, "tags": [], "id": "21748a7c" }, "outputs": [], "source": [ "import json\n", "def accuracy_retriever(retriever, dataset):\n", " with open(dataset) as f:\n", " dataset = json.load(f)['data']\n", " found_answers = 0\n", " total_answers = 0\n", " for article in dataset:\n", " for paragraph in article['paragraphs']:\n", " for qa in paragraph['qas']:\n", " for answer in qa['answers']:\n", " docs = retriever.retrieve(qa['question'],top_k = 20)\n", " docs_list = get_list_of_passages(docs,top_k = 20)\n", " for doc in docs_list:\n", " if doc.find(answer['text']) != -1:\n", " found_answers += 1\n", " break\n", " total_answers += 1\n", " print(\"Found answers so far: \" + str(found_answers))\n", " print(\"Total answers so far: \" + str(total_answers))\n", " print(\"####################################################\")\n", " print(\"DONE\")\n", " print(\"####################################################\")\n", " print(\"Found answers: \" + str(found_answers))\n", " print(\"Accuracy is: \" + str(found_answers / total_answers))\n", " return found_answers, total_answers" ] }, { "cell_type": "code", "execution_count": null, "id": "44d58d59", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:40:43.028549Z", "iopub.status.busy": "2021-10-19T06:40:43.028017Z", "iopub.status.idle": "2021-10-19T06:58:41.907659Z", "shell.execute_reply": "2021-10-19T06:58:41.908768Z", "shell.execute_reply.started": "2021-10-04T20:51:22.808879Z" }, "papermill": { "duration": 1079.232978, "end_time": "2021-10-19T06:58:41.909132", "exception": false, "start_time": "2021-10-19T06:40:42.676154", "status": "completed" }, "tags": [], "id": "44d58d59" }, "outputs": [], "source": [ "accuracy_retriever(retriever,'../input/tydiaqatestset/tydiqa-goldp-dev-arabic.json')" ] }, { "cell_type": "code", "execution_count": null, "id": "a5f95d89", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:43.230421Z", "iopub.status.busy": "2021-10-19T06:58:43.229438Z", "iopub.status.idle": "2021-10-19T06:58:43.231340Z", "shell.execute_reply": "2021-10-19T06:58:43.231873Z", "shell.execute_reply.started": "2021-10-04T20:52:24.045139Z" }, "papermill": { "duration": 0.567851, "end_time": "2021-10-19T06:58:43.232015", "exception": false, "start_time": "2021-10-19T06:58:42.664164", "status": "completed" }, "tags": [], "id": "a5f95d89" }, "outputs": [], "source": [ "# retriever_eval_results['predictions'][0]" ] }, { "cell_type": "code", "execution_count": null, "id": "2df40a1c", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:44.360750Z", "iopub.status.busy": "2021-10-19T06:58:44.360080Z", "iopub.status.idle": "2021-10-19T06:58:54.358640Z", "shell.execute_reply": "2021-10-19T06:58:54.359067Z", "shell.execute_reply.started": "2021-10-04T21:43:15.488168Z" }, "papermill": { "duration": 10.562521, "end_time": "2021-10-19T06:58:54.359214", "exception": false, "start_time": "2021-10-19T06:58:43.796693", "status": "completed" }, "tags": [], "id": "2df40a1c" }, "outputs": [], "source": [ "# reader_eval_results = reader.eval(document_store=document_store, device=device, label_index=label_index, doc_index=doc_index)\n", "# Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch\n", "reader_eval_results = reader.eval_on_file(\"../input/tydiaqatestset/\",\"tydiqa-goldp-dev-arabic.json\", device=device)\n", "\n", "## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer\n", "print(\"Reader Top-N-Accuracy:\", reader_eval_results[\"top_n_accuracy\"])\n", "## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer\n", "print(\"Reader Exact Match:\", reader_eval_results[\"EM\"])\n", "## Reader F1-Score is the average overlap between the predicted answers and the correct answers\n", "print(\"Reader F1-Score:\", reader_eval_results[\"f1\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "9ecfc283", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:55.576896Z", "iopub.status.busy": "2021-10-19T06:58:55.576187Z", "iopub.status.idle": "2021-10-19T06:58:55.579743Z", "shell.execute_reply": "2021-10-19T06:58:55.579276Z" }, "papermill": { "duration": 0.597923, "end_time": "2021-10-19T06:58:55.579861", "exception": false, "start_time": "2021-10-19T06:58:54.981938", "status": "completed" }, "tags": [], "id": "9ecfc283" }, "outputs": [], "source": [ "from haystack import Pipeline\n", "\n", "# Here is the pipeline definition\n", "p = Pipeline()\n", "p.add_node(component=retriever, name=\"ESRetriever\", inputs=[\"Query\"])\n", "p.add_node(component=eval_retriever, name=\"EvalRetriever\", inputs=[\"ESRetriever\"])\n", "p.add_node(component=reader, name=\"QAReader\", inputs=[\"EvalRetriever\"])\n", "p.add_node(component=eval_reader, name=\"EvalReader\", inputs=[\"QAReader\"])\n", "results = []" ] }, { "cell_type": "code", "execution_count": null, "id": "59342810", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:56.731371Z", "iopub.status.busy": "2021-10-19T06:58:56.730477Z", "iopub.status.idle": "2021-10-19T06:58:56.733025Z", "shell.execute_reply": "2021-10-19T06:58:56.732621Z" }, "papermill": { "duration": 0.590731, "end_time": "2021-10-19T06:58:56.733139", "exception": false, "start_time": "2021-10-19T06:58:56.142408", "status": "completed" }, "tags": [], "id": "59342810" }, "outputs": [], "source": [ "query_pipeline = Pipeline()\n", "query_pipeline.add_node(component=retriever, name=\"ESRetriever\", inputs=[\"Query\"])\n", "query_pipeline.add_node(component=reader, name=\"QAReader\",inputs=[\"ESRetriever\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "080cde5e", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:58.193519Z", "iopub.status.busy": "2021-10-19T06:58:58.192687Z", "iopub.status.idle": "2021-10-19T06:58:58.196696Z", "shell.execute_reply": "2021-10-19T06:58:58.197254Z" }, "papermill": { "duration": 0.885689, "end_time": "2021-10-19T06:58:58.197453", "exception": false, "start_time": "2021-10-19T06:58:57.311764", "status": "completed" }, "tags": [], "id": "080cde5e" }, "outputs": [], "source": [ "#query = 'ما هي عاصمة فلسطين ؟'\n", "#res = query_pipeline.run(query=query)\n", "#for ans in res['answers']:\n", " # print(ans['answer'])" ] }, { "cell_type": "code", "execution_count": null, "id": "60d73126", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:58:59.538787Z", "iopub.status.busy": "2021-10-19T06:58:59.537058Z", "iopub.status.idle": "2021-10-19T06:58:59.539393Z", "shell.execute_reply": "2021-10-19T06:58:59.539860Z", "shell.execute_reply.started": "2021-10-04T21:24:32.774006Z" }, "papermill": { "duration": 0.582619, "end_time": "2021-10-19T06:58:59.540022", "exception": false, "start_time": "2021-10-19T06:58:58.957403", "status": "completed" }, "tags": [], "id": "60d73126" }, "outputs": [], "source": [ "# from haystack.pipeline import ExtractiveQAPipeline\n", "# from haystack.utils import print_answers\n", "# pipe = ExtractiveQAPipeline(reader, retriever)\n", "\n", "# # Voilà! Ask a question!\n", "# question = \"أين تقع فلسطين ؟\"\n", "# prediction = pipe.run(query=question)\n", "# print_answers(prediction)" ] }, { "cell_type": "code", "execution_count": null, "id": "d3226a67", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:59:00.680482Z", "iopub.status.busy": "2021-10-19T06:59:00.679649Z", "iopub.status.idle": "2021-10-19T06:59:00.682231Z", "shell.execute_reply": "2021-10-19T06:59:00.681834Z", "shell.execute_reply.started": "2021-10-04T22:02:43.495085Z" }, "papermill": { "duration": 0.574999, "end_time": "2021-10-19T06:59:00.682344", "exception": false, "start_time": "2021-10-19T06:59:00.107345", "status": "completed" }, "tags": [], "id": "d3226a67" }, "outputs": [], "source": [ "labels = labels[0:900]" ] }, { "cell_type": "code", "execution_count": null, "id": "9335f235", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T06:59:01.811712Z", "iopub.status.busy": "2021-10-19T06:59:01.810880Z", "iopub.status.idle": "2021-10-19T10:01:57.509604Z", "shell.execute_reply": "2021-10-19T10:01:57.510207Z", "shell.execute_reply.started": "2021-10-04T22:02:46.498449Z" }, "papermill": { "duration": 10976.267081, "end_time": "2021-10-19T10:01:57.510439", "exception": false, "start_time": "2021-10-19T06:59:01.243358", "status": "completed" }, "tags": [], "id": "9335f235" }, "outputs": [], "source": [ "count =0\n", "for l in labels:\n", " res = p.run(\n", " query=l.query,\n", " labels=l,\n", " params={\"index\": doc_index, \"Retriever\": {\"top_k\": 20}, \"Reader\": {\"top_k\": 3}}\n", " )\n", " count = count+1\n", " results.append(res)\n", " print(count)" ] }, { "cell_type": "code", "execution_count": null, "id": "2bc8cda2", "metadata": { "execution": { "iopub.execute_input": "2021-10-19T10:02:15.249438Z", "iopub.status.busy": "2021-10-19T10:02:15.248547Z", "iopub.status.idle": "2021-10-19T10:02:15.254756Z", "shell.execute_reply": "2021-10-19T10:02:15.254261Z", "shell.execute_reply.started": "2021-10-04T20:59:20.627743Z" }, "papermill": { "duration": 8.652755, "end_time": "2021-10-19T10:02:15.254882", "exception": false, "start_time": "2021-10-19T10:02:06.602127", "status": "completed" }, "tags": [], "id": "2bc8cda2" }, "outputs": [], "source": [ "# n_queries = len(labels)\n", "eval_retriever.print()\n", "print()\n", "retriever.print_time()\n", "print()\n", "eval_reader.print(mode=\"reader\")\n", "print()\n", "reader.print_time()\n", "print()\n", "eval_reader.print(mode=\"pipeline\")" ] }, { "cell_type": "code", "execution_count": null, "id": "83ffaad2", "metadata": { "papermill": { "duration": 8.72414, "end_time": "2021-10-19T10:02:32.168339", "exception": false, "start_time": "2021-10-19T10:02:23.444199", "status": "completed" }, "tags": [], "id": "83ffaad2" }, "outputs": [], "source": [ "" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" }, "papermill": { "default_parameters": {}, "duration": 22578.435554, "end_time": "2021-10-19T10:02:44.990077", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2021-10-19T03:46:26.554523", "version": "2.3.3" }, "colab": { "name": "evaluate-dpr-retriever-reader-b00318.ipynb", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 5 }