Skip to content

Commit

Permalink
recursive search docx
Browse files Browse the repository at this point in the history
  • Loading branch information
liao961120 committed Apr 30, 2020
1 parent fdb4ec2 commit 61dabfd
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
19 changes: 10 additions & 9 deletions GlossProcessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
import pathlib
import logging
from docx import Document

Expand Down Expand Up @@ -51,15 +52,15 @@ def __init__(self, docs_folder_path='.'):


def _load_data(self, path):

for filename in os.listdir(path):
if filename.endswith(".docx"):
try:
glosses = process_doc(os.path.join(path, filename))
except:
logging.warning(f"Invalid formatting in docx: `{filename}`")
continue
self.data[filename] = tokenize_glosses(glosses, filename)
path = pathlib.Path(path)

for fp in path.rglob('*.docx'):
try:
glosses = process_doc(str(fp))
except:
logging.warning(f"Invalid formatting in docx: `{fp}`")
continue
self.data[str(fp)] = tokenize_glosses(glosses, str(fp))


def search_gloss(self, tokens: str, regex=False):
Expand Down
3 changes: 2 additions & 1 deletion run_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
# Setup os specific parameters
python = sys.executable
SERVER_SCRIPT_PATH = pathlib.Path(SERVER_SCRIPT_PATH)
DOCX_FOLDER_PATH = pathlib.Path(DOCX_FOLDER_PATH)
os.chdir(DOCX_FOLDER_PATH)
DOCX_FOLDER_PATH = pathlib.Path('.')


# Check & install dependencies
Expand Down

0 comments on commit 61dabfd

Please sign in to comment.