Skip to content

Commit

Permalink
Be a bit safer
Browse files Browse the repository at this point in the history
  • Loading branch information
AngledLuffa committed Oct 28, 2023
1 parent 8b96020 commit d075b99
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
18 changes: 11 additions & 7 deletions stanza/models/common/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,14 +1024,18 @@ def is_mwt(self):
return len(self.words) > 1

def space_after(self):
if "SpaceAfter=No" in self.misc.split("|") or "SpaceAfter=No" in self.words[-1].misc.split("|"):
if self.misc and "SpaceAfter=No" in self.misc.split("|"):
return ""
for piece in self.misc.split("|"):
if piece.startswith("SpacesAfter="):
return piece.split("=", maxsplit=1)[1]
for piece in self.words[-1].misc.split("|"):
if piece.startswith("SpacesAfter="):
return piece.split("=", maxsplit=1)[1]
if self.words[-1].misc and "SpaceAfter=No" in self.words[-1].misc.split("|"):
return ""
for if self.misc:
for piece in self.misc.split("|"):
if piece.startswith("SpacesAfter="):
return piece.split("=", maxsplit=1)[1]
if self.words[-1].misc:
for piece in self.words[-1].misc.split("|"):
if piece.startswith("SpacesAfter="):
return piece.split("=", maxsplit=1)[1]
return " "

class Word(StanzaObject):
Expand Down
2 changes: 1 addition & 1 deletion stanza/server/ssurgeon.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def convert_response_to_doc(doc, semgrex_response):
sentence = Sentence(mwt_tokens, doc)

token_text = [token.text if token_idx == len(sentence.tokens) - 1
else token.text + token.space_after()
else token.text + token.space_after().replace("\n", "")
for token_idx, token in enumerate(sentence.tokens)]
sentence_text = "".join(token_text)

Expand Down

0 comments on commit d075b99

Please sign in to comment.