Skip to content

Commit

Permalink
minor bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Dec 1, 2023
1 parent 5820cb6 commit 4dc7231
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="stream2sentence",
version="0.2.0",
version="0.2.1",
author="Kolja Beigel",
author_email="[email protected]",
description="Real-time processing and delivery of sentences from a continuous stream of characters or text chunks.",
Expand Down
11 changes: 7 additions & 4 deletions stream2sentence/stream2sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def initialize_nltk():
global nltk_initialized
if nltk_initialized:
return
print ("Starting tokenizer nltk")

logging.info("Initializing NLTK")

import nltk
try:
_ = nltk.data.find('tokenizers/punkt')
Expand All @@ -32,12 +34,13 @@ def initialize_stanza(language: str = "en"):
"""
Initializes Stanza by downloading required data for sentence tokenization.
"""
global stanza_initialized
global nlp, stanza_initialized
if stanza_initialized:
return
print (f"Starting tokenizer stanza with language {language}")

logging.info("Initializing Stanza")

import stanza
global nlp
stanza.download(language)
nlp = stanza.Pipeline(language)
stanza_initialized = True
Expand Down

0 comments on commit 4dc7231

Please sign in to comment.