Skip to content

Commit

Permalink
terminators: remove ellipses terminator, add halfwidth ideographic fu…
Browse files Browse the repository at this point in the history
…llstop

Fixes issue #11
  • Loading branch information
santhoshtr committed Nov 14, 2023
1 parent a214b5c commit 9ff7526
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
1 change: 0 additions & 1 deletion sentencex/languages/hy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,4 @@ class Armenian(Language):

hy_terminators = GLOBAL_SENTENCE_TERMINATORS + ["։", "՜", ":"]
hy_terminators.remove(".")
hy_terminators.remove("...")
sentence_break_regex = re.compile(r"[%s]+" % "".join(hy_terminators))
3 changes: 2 additions & 1 deletion sentencex/terminators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# unicode code points generated with Unicode::Tussle perl script:
# unichars -aBbs '[\p{Sentence_Break=STerm}\p{Sentence_Break=ATerm}]' | awk '$2="\""$2"\", #"'
# Refer: https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/SentenceBreakProperty.txt
# ruff: noqa: E501
GLOBAL_SENTENCE_TERMINATORS = (
[
Expand Down Expand Up @@ -159,7 +160,7 @@
]
+ [
# Additional manual entries.
"...", # U+2026 HORIZONTAL ELLIPSIS
"。", # U+3002 IDEOGRAPHIC FULL STOP
"。", # U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP
]
)

0 comments on commit 9ff7526

Please sign in to comment.