Skip to content

Commit

Permalink
[app][fix] dont crash info for total quotes
Browse files Browse the repository at this point in the history
  • Loading branch information
M3ssman committed Oct 24, 2024
1 parent 58d0207 commit 156b00a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "digiflow"
version = "5.5.8"
version = "5.5.9"
description = "Father's Little Digitization Workflow Helper"
readme = "README.md"
requires-python = ">=3.8"
Expand Down
15 changes: 13 additions & 2 deletions src/digiflow/record/record_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

RECORD_STATE_MASK_FRAME = 'other_load'
SETSPEC_SPLITTER = '##'
STRING_QUOTES = "\"'"


class RecordHandlerException(Exception):
Expand Down Expand Up @@ -375,13 +376,23 @@ def _merge(self_record, other_record):
self_record[df_r.FIELD_STATE] = other_record[df_r.FIELD_STATE]
self_record[df_r.FIELD_STATETIME] = other_record[df_r.FIELD_STATETIME]
try:
self_info = ast.literal_eval(self_record[df_r.FIELD_INFO])
other_info = ast.literal_eval(other_record[df_r.FIELD_INFO])
self_info = ast.literal_eval(_clear_trailing_quotes(self_record[df_r.FIELD_INFO]))
other_info = ast.literal_eval(_clear_trailing_quotes(other_record[df_r.FIELD_INFO]))
self_info.update(other_info)
self_record[df_r.FIELD_INFO] = str(self_info)
except (SyntaxError, ValueError):
self_record[df_r.FIELD_INFO] = other_record[df_r.FIELD_INFO]

def _clear_trailing_quotes(raw_string:str):
"""Remove evil trailing chars like double/single
quotation marks"""

if raw_string[0] in STRING_QUOTES:
raw_string = raw_string[1:]
if raw_string[-1] in STRING_QUOTES:
raw_string = raw_string[:-1]
return raw_string


def _is_unset(self_record):
if self_record[df_r.FIELD_STATE] == df_r.UNSET_LABEL:
Expand Down
39 changes: 37 additions & 2 deletions tests/test_digiflow_record_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ def test_record_handler_merge_info_dicts(tmp_path):
Please note:
This merge will only work if both INFO fields
can the evaluated to dictionaries!
can evaluate to dictionaries!
"""

# arrange
Expand Down Expand Up @@ -840,4 +840,39 @@ def test_record_handler_merge_info_dicts(tmp_path):
assert results['ignores'] == 0
assert results['appendeds'] == 0
assert dst_hndlr.total_len == 2
assert merged_record.info == {'n_ocr': 20, 'pages': 23, 'ods_created':'1984-10-03'}
assert merged_record.info == {'n_ocr': 20, 'pages': 23, 'ods_created': '1984-10-03'}


def test_record_handler_merge_write_read(tmp_path):
"""Two lists info field merged too but managed
to handle quotations around the info-string.
"""

# arrange
path_oai_list_a = tmp_path / 'oai_list_a'
data_fresh = [
"123\tn.a.\t2015-08-25T20:00:35Z\t{'pages':23, 'ods_created':'1984-10-03'}\tu.a.\tn.a.\n"
]
write_datalist(path_oai_list_a, data_fresh, LEGACY_HEADER_STR)
dst_hndlr = df_r.RecordHandler(
path_oai_list_a,
data_fields=df_r.LEGACY_HEADER,
transform_func=df_r.row_to_record)

list_merge = tmp_path / 'oai_list_b'
data2 = [
"123\tn.a.\t2015-08-25T20:00:35Z\t\"{'xml_invalid': \"Element 'mods:subtitle': This element is not expected.\"}\"\tocr_done\t2024-10-18_11:12:00\n",
]
write_datalist(list_merge, data2, LEGACY_HEADER_STR)

# act
dst_hndlr.merges(list_merge, dry_run=False)
new_hndlr = df_r.RecordHandler(path_oai_list_a,
data_fields=df_r.LEGACY_HEADER,
transform_func=df_r.row_to_record)

# assert
tha_record: df_r.Record = new_hndlr.next_record(state='ocr_done')
assert tha_record.info == {'pages': 23,
'ods_created': '1984-10-03',
'xml_invalid': "Element 'mods:subtitle': This element is not expected."}

0 comments on commit 156b00a

Please sign in to comment.