diff --git a/stanza/models/common/doc.py b/stanza/models/common/doc.py index 19165fcf64..3d6d88163a 100644 --- a/stanza/models/common/doc.py +++ b/stanza/models/common/doc.py @@ -1024,14 +1024,18 @@ def is_mwt(self): return len(self.words) > 1 def space_after(self): - if "SpaceAfter=No" in self.misc.split("|") or "SpaceAfter=No" in self.words[-1].misc.split("|"): + if self.misc and "SpaceAfter=No" in self.misc.split("|"): return "" - for piece in self.misc.split("|"): - if piece.startswith("SpacesAfter="): - return piece.split("=", maxsplit=1)[1] - for piece in self.words[-1].misc.split("|"): - if piece.startswith("SpacesAfter="): - return piece.split("=", maxsplit=1)[1] + if self.words[-1].misc and "SpaceAfter=No" in self.words[-1].misc.split("|"): + return "" + for if self.misc: + for piece in self.misc.split("|"): + if piece.startswith("SpacesAfter="): + return piece.split("=", maxsplit=1)[1] + if self.words[-1].misc: + for piece in self.words[-1].misc.split("|"): + if piece.startswith("SpacesAfter="): + return piece.split("=", maxsplit=1)[1] return " " class Word(StanzaObject): diff --git a/stanza/server/ssurgeon.py b/stanza/server/ssurgeon.py index 6fcbed19c5..a6bae16760 100644 --- a/stanza/server/ssurgeon.py +++ b/stanza/server/ssurgeon.py @@ -185,7 +185,7 @@ def convert_response_to_doc(doc, semgrex_response): sentence = Sentence(mwt_tokens, doc) token_text = [token.text if token_idx == len(sentence.tokens) - 1 - else token.text + token.space_after() + else token.text + token.space_after().replace("\n", "") for token_idx, token in enumerate(sentence.tokens)] sentence_text = "".join(token_text)