-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Load additional fields from SQUAD-format file to meta field for …
…labels #5978 (#6301) * Load additional fields from SQUAD-format file to meta field for labels * added a test function * rewritten test using pytest * added release notes * improve release note * clean up test --------- Co-authored-by: Stefano Fiorucci <[email protected]>
- Loading branch information
Showing
3 changed files
with
75 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 4 additions & 0 deletions
4
releasenotes/notes/metadata_enhancement_for_SQuAD_files-2247c72f07760465.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
--- | ||
enhancements: | ||
- | | ||
Make it possible to load additional fields from the SQUAD format file into the meta field of the Labels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import json | ||
import os | ||
import pytest | ||
from haystack.document_stores import eval_data_from_json | ||
|
||
|
||
@pytest.fixture | ||
def temp_squad_file(tmp_path): | ||
temp_filename = tmp_path / "temp_squad_file.json" | ||
with open(temp_filename, "w", encoding="utf-8") as temp_file: | ||
json.dump( | ||
{ | ||
"metadata": { | ||
"dataset_version": "1.0", | ||
"description": "This dataset contains questions and answers related to...", | ||
"other_metadata_field": "value", | ||
}, | ||
"data": [ | ||
{ | ||
"title": "Article Title", | ||
"paragraphs": [ | ||
{ | ||
"context": "This is the context of the article.", | ||
"qas": [ | ||
{ | ||
"question": "What is the SQuAD dataset?", | ||
"id": 0, | ||
"answers": [{"text": "This is the context", "answer_start": 0}], | ||
"annotator": "annotator0", | ||
"date": "2023-11-07", | ||
}, | ||
{ | ||
"question": "Another question?", | ||
"id": 1, | ||
"answers": [{"text": "This is the context of the article", "answer_start": 0}], | ||
"annotator": "annotator1", | ||
"date": "2023-12-09", | ||
}, | ||
], | ||
} | ||
], | ||
"author": "Your Name", | ||
"creation_date": "2023-11-14", | ||
} | ||
], | ||
}, | ||
temp_file, | ||
indent=2, | ||
) | ||
return temp_filename | ||
|
||
|
||
def test_eval_data_from_json(temp_squad_file): | ||
# Call the function with the temporary file | ||
docs, labels = eval_data_from_json(temp_squad_file) | ||
|
||
assert len(docs) == 1 | ||
assert len(labels) == 2 | ||
|
||
assert docs[0].content == "This is the context of the article." | ||
assert labels[0].query == "What is the SQuAD dataset?" | ||
assert labels[0].meta == {"annotator": "annotator0", "date": "2023-11-07"} | ||
|
||
assert labels[1].query == "Another question?" | ||
assert labels[1].meta == {"annotator": "annotator1", "date": "2023-12-09"} |