Skip to content

Commit

Permalink
fix(utils): make folioStr dectection more reliable
Browse files Browse the repository at this point in the history
  • Loading branch information
musicEnfanthen committed Apr 26, 2024
1 parent 921dd67 commit c51cd93
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions convert_source_description/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def _get_folios(sibling_paras: List[Tag]) -> List[Folio]:

for para in sibling_paras:
stripped_para_text = _strip_by_delimiter(para.text, ' \t')
if len(stripped_para_text) == 1:
if len(stripped_para_text) != 2:
stripped_para_text = _strip_by_delimiter(para.text, '\t')

# Check sibling paragraph for folioStr or pageStr to create a new folio entry
Expand All @@ -687,7 +687,11 @@ def _get_folios(sibling_paras: List[Tag]) -> List[Folio]:
folio = copy.deepcopy(emptyFolio)

# Extract folio label
folio['folio'] = _get_folio_label(stripped_para_text[0].strip())
if stripped_para_text:
if FOLIO_STR in stripped_para_text[0] or PAGE_STR in stripped_para_text[0]:
folio['folio'] = _get_folio_label(stripped_para_text[0].strip())
elif len(stripped_para_text) > 2 and (FOLIO_STR in stripped_para_text[1] or PAGE_STR in stripped_para_text[1]):
folio['folio'] = _get_folio_label(stripped_para_text[1].strip())

# Check if the paragraph contains a page marker
if page_found:
Expand Down

0 comments on commit c51cd93

Please sign in to comment.