From b33f9126976470b74125efb18c818b5171f46610 Mon Sep 17 00:00:00 2001
From: John Bauer <horatio@gmail.com>
Date: Thu, 4 Jan 2024 20:54:55 -0800
Subject: [PATCH] Move the span checking outside the mention building loop

---
 stanza/pipeline/coref_processor.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/stanza/pipeline/coref_processor.py b/stanza/pipeline/coref_processor.py
index e7cebf39cc..3b89cd60ca 100644
--- a/stanza/pipeline/coref_processor.py
+++ b/stanza/pipeline/coref_processor.py
@@ -104,6 +104,14 @@ def process(self, document):
                 continue
             span_cluster = sorted(span_cluster)
 
+            for span in span_cluster:
+                # check there are no sentence crossings before
+                # manipulating the spans, since we will expect it to
+                # be this way for multiple usages of the spans
+                sent_id = sent_ids[span[0]]
+                if sent_ids[span[1]] != sent_id:
+                    raise ValueError("The coref model predicted a span that crossed two sentences!  Please send this example to us on our github")
+
             # treat the longest span as the representative
             # break ties using the first one
             max_len = 0
@@ -116,8 +124,6 @@ def process(self, document):
             mentions = []
             for span in span_cluster:
                 sent_id = sent_ids[span[0]]
-                if sent_ids[span[1]] != sent_id:
-                    raise ValueError("The coref model predicted a span that crossed two sentences!  Please send this example to us on our github")
                 start_word = word_pos[span[0]]
                 end_word = word_pos[span[1]]
                 mentions.append(CorefMention(sent_id, start_word, end_word))