From b33f9126976470b74125efb18c818b5171f46610 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Thu, 4 Jan 2024 20:54:55 -0800 Subject: [PATCH] Move the span checking outside the mention building loop --- stanza/pipeline/coref_processor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/stanza/pipeline/coref_processor.py b/stanza/pipeline/coref_processor.py index e7cebf39cc..3b89cd60ca 100644 --- a/stanza/pipeline/coref_processor.py +++ b/stanza/pipeline/coref_processor.py @@ -104,6 +104,14 @@ def process(self, document): continue span_cluster = sorted(span_cluster) + for span in span_cluster: + # check there are no sentence crossings before + # manipulating the spans, since we will expect it to + # be this way for multiple usages of the spans + sent_id = sent_ids[span[0]] + if sent_ids[span[1]] != sent_id: + raise ValueError("The coref model predicted a span that crossed two sentences! Please send this example to us on our github") + # treat the longest span as the representative # break ties using the first one max_len = 0 @@ -116,8 +124,6 @@ def process(self, document): mentions = [] for span in span_cluster: sent_id = sent_ids[span[0]] - if sent_ids[span[1]] != sent_id: - raise ValueError("The coref model predicted a span that crossed two sentences! Please send this example to us on our github") start_word = word_pos[span[0]] end_word = word_pos[span[1]] mentions.append(CorefMention(sent_id, start_word, end_word))