diff --git a/setup.py b/setup.py index f34fc77..436a2cf 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="stream2sentence", - version="0.2.0", + version="0.2.1", author="Kolja Beigel", author_email="kolja.beigel@web.de", description="Real-time processing and delivery of sentences from a continuous stream of characters or text chunks.", diff --git a/stream2sentence/stream2sentence.py b/stream2sentence/stream2sentence.py index c75518e..c445314 100644 --- a/stream2sentence/stream2sentence.py +++ b/stream2sentence/stream2sentence.py @@ -20,7 +20,9 @@ def initialize_nltk(): global nltk_initialized if nltk_initialized: return - print ("Starting tokenizer nltk") + + logging.info("Initializing NLTK") + import nltk try: _ = nltk.data.find('tokenizers/punkt') @@ -32,12 +34,13 @@ def initialize_stanza(language: str = "en"): """ Initializes Stanza by downloading required data for sentence tokenization. """ - global stanza_initialized + global nlp, stanza_initialized if stanza_initialized: return - print (f"Starting tokenizer stanza with language {language}") + + logging.info("Initializing Stanza") + import stanza - global nlp stanza.download(language) nlp = stanza.Pipeline(language) stanza_initialized = True