Skip to content

Commit

Permalink
Update process_long_text.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucaterre authored Jul 22, 2022
1 parent 5b88277 commit 130de6c
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions examples/process_long_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def open_file(file_name: str) -> str:
with open(file_name, mode="r", encoding="utf-8") as f:
return f.read()


# use the tokenizer and apply the cleaning functions
# of your choice
def text_preprocessor(text: str) -> list:
return [sentence.strip() for sentence in text.split("\n") if sentence != ""]

Expand Down Expand Up @@ -71,7 +72,7 @@ def __call__(self, doc: Doc):
language = "en"
filename = "data/text_en.txt"

# Apply preprocessing
# Apply text preprocessing
sentences = text_preprocessor(open_file(filename))
huge_text = " ".join(sentences)

Expand Down

0 comments on commit 130de6c

Please sign in to comment.