From 27d306ff5e5ca09d2af6874659f47b4cb86e678b Mon Sep 17 00:00:00 2001 From: John Bauer Date: Sun, 12 Nov 2023 21:10:59 -0800 Subject: [PATCH] Add some documentation on the ontonotes/ww conversion --- stanza/utils/datasets/ner/ontonotes_multitag.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/stanza/utils/datasets/ner/ontonotes_multitag.py b/stanza/utils/datasets/ner/ontonotes_multitag.py index 578a17f568..47d694b1ca 100644 --- a/stanza/utils/datasets/ner/ontonotes_multitag.py +++ b/stanza/utils/datasets/ner/ontonotes_multitag.py @@ -1,3 +1,14 @@ +""" +Combines OntoNotes and WW into a single dataset with OntoNotes used for dev & test + +The resulting dataset has two layers saved in the multi_ner column. + +WW is kept as 9 classes, with the tag put in either the first or +second layer depending on the flags. + +OntoNotes is converted to one column for 18 and one column for 9 classes. +""" + import argparse import json import os