Skip to content

Commit

Permalink
fix(utils): move default objects into separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
musicEnfanthen committed Apr 26, 2024
1 parent a5534c4 commit c5ac29e
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 97 deletions.
87 changes: 87 additions & 0 deletions convert_source_description/default_objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Default objects that are used to create new source descriptions objects in the utils module."""

from typed_classes import (Row, System, Folio, ContentItem, Description,
SourceDescription, SourceList, LinkBox,
TextcriticalComment, TextCritics, TextcriticsList)

########
defaultSourceList: SourceList = {
"sources": []
}

defaultSourceDescription: SourceDescription = {
"id": "",
"siglum": "",
"siglumAddendum": "",
"type": "",
"location": "",
"description": {}
}

defaultDescription: Description = {
"desc": [],
"writingMaterialString": "",
"writingInstruments": {
"main": "",
"secondary": []
},
"title": "",
"date": "",
"pagination": "",
"measureNumbers": "",
"instrumentation": "",
"annotations": "",
"content": []
}

defaultContentItem: ContentItem = {
"item": "",
"itemLinkTo": "",
"itemDescription": "",
"folios": []
}

defaultFolio: Folio = {
"folio": "",
"folioLinkTo": "",
"folioDescription": "",
"systemGroups": []
}

defaultSystem: System = {
"system": "",
"measure": "",
"linkTo": ""
}

defaultRow: Row = {
"rowType": "",
"rowBase": "",
"rowNumber": ""
}

defaultTextcriticsList: TextcriticsList = {
"textcritics": []
}

defaultTextcritics: TextCritics = {
"id": "",
"label": "",
"description": [],
# "rowTable": False,
"comments": [],
"linkBoxes": []
}

defaultTextcriticalComment: TextcriticalComment = {
"svgGroupId": "TODO",
"measure": "",
"system": "",
"position": "",
"comment": ""
}

defaultLinkBox: LinkBox = {
"svgGroupId": "",
"linkTo": ""
}
114 changes: 17 additions & 97 deletions convert_source_description/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,101 +12,21 @@
import mammoth
from bs4 import BeautifulSoup, Tag

from typed_classes import (Row, System, Folio, ContentItem, WritingInstruments,
Description, SourceDescription, SourceList, LinkBox,
TextcriticalComment, TextCritics, TextcriticsList)

from typed_classes import (System, Folio, ContentItem, WritingInstruments,
Description, SourceDescription, SourceList, TextCritics)
from default_objects import (defaultSourceList, defaultSourceDescription, defaultDescription,
defaultContentItem, defaultFolio, defaultSystem, defaultRow,
defaultTextcriticsList, defaultTextcritics,
defaultTextcriticalComment)

############################################
# Helper variables: Strings & Objects
# Helper strings
############################################
SYSTEM_STR = 'System'
MEASURE_STR = 'T.'
FOLIO_STR = 'Bl.'
PAGE_STR = 'S.'

########
emptySourceList: SourceList = {
"sources": []
}

emptySourceDescription: SourceDescription = {
"id": "",
"siglum": "",
"siglumAddendum": "",
"type": "",
"location": "",
"description": {}
}

emptyDescription: Description = {
"desc": [],
"writingMaterialString": "",
"writingInstruments": {
"main": "",
"secondary": []
},
"title": "",
"date": "",
"pagination": "",
"measureNumbers": "",
"instrumentation": "",
"annotations": "",
"content": []
}

emptyContentItem: ContentItem = {
"item": "",
"itemLinkTo": "",
"itemDescription": "",
"folios": []
}

emptyFolio: Folio = {
"folio": "",
"folioLinkTo": "",
"folioDescription": "",
"systemGroups": []
}

emptySystem: System = {
"system": "",
"measure": "",
"linkTo": ""
}

emptyRow: Row = {
"rowType": "",
"rowBase": "",
"rowNumber": ""
}

emptyTextcriticsList: TextcriticsList = {
"textcritics": []
}

emptyTextcritics: TextCritics = {
"id": "",
"label": "",
"description": [],
# "rowTable": False,
"comments": [],
"linkBoxes": []
}

emptyTextcriticalComment: TextcriticalComment = {
"svgGroupId": "TODO",
"measure": "",
"system": "",
"position": "",
"comment": ""
}

emptyLinkBox: LinkBox = {
"svgGroupId": "",
"linkTo": ""
}


############################################
# Public class: ConversionUtils
Expand All @@ -128,7 +48,7 @@ def create_source_list(self, soup: BeautifulSoup) -> SourceList:
Returns:
A SourceList object containing a list of SourceDescription objects.
"""
source_list = copy.deepcopy(emptySourceList)
source_list = copy.deepcopy(defaultSourceList)
sources = source_list['sources']

# Find all p tags in soup
Expand Down Expand Up @@ -174,18 +94,18 @@ def create_textcritics(self, soup: BeautifulSoup) -> TextCritics:
Returns:
A SourceList object containing a list of SourceDescription objects.
"""
textcritics_list = copy.deepcopy(emptyTextcriticsList)
textcritics_list = copy.deepcopy(defaultTextcriticsList)

# Find all table tags in soup
tables = soup.find_all('table')

# Iterate over tables and create textcritics
for table_index, table in enumerate(tables):
textcritics = copy.deepcopy(emptyTextcritics)
textcritics = copy.deepcopy(defaultTextcritics)

table_rows = table.find_all('tr')
for row in table_rows[1:]:
comment = copy.deepcopy(emptyTextcriticalComment)
comment = copy.deepcopy(defaultTextcriticalComment)
table_cols = row.find_all('td')
comment['measure'] = _strip_tag(
_strip_tag(table_cols[0], 'td'), 'p')
Expand Down Expand Up @@ -284,7 +204,7 @@ def _create_source_description(paras: List[Tag]) -> SourceDescription:
Returns:
SourceDescription: A dictionary representing the source description.
"""
source_description = copy.deepcopy(emptySourceDescription)
source_description = copy.deepcopy(defaultSourceDescription)

# Get siglum
siglum, siglum_addendum = _get_siglum(paras)
Expand Down Expand Up @@ -346,7 +266,7 @@ def _get_description(paras: List[Tag], source_id: str) -> Description:
Returns:
Description: A dictionary representing the description of the source description.
"""
description = copy.deepcopy(emptyDescription)
description = copy.deepcopy(defaultDescription)
desc = _strip_tag(paras[3], 'p') or ''
description['desc'].append(desc)

Expand Down Expand Up @@ -584,7 +504,7 @@ def _get_folios(sibling_paras: List[Tag]) -> List[Folio]:

if has_folio_str:
# Create folio object
folio = copy.deepcopy(emptyFolio)
folio = copy.deepcopy(defaultFolio)

# Extract folio label
if stripped_para_text:
Expand Down Expand Up @@ -671,7 +591,7 @@ def _get_item(para: Tag) -> ContentItem:
item_description = stripped_para_content[0].strip().rstrip(':')

# Create item object
item = copy.deepcopy(emptyContentItem)
item = copy.deepcopy(defaultContentItem)
item['item'] = item_label
item['itemLinkTo'] = item_link_to
item['itemDescription'] = item_description
Expand Down Expand Up @@ -810,7 +730,7 @@ def _get_system_group(stripped_para_text: List[str]) -> List[System]:
continue

# Create system object
system = copy.deepcopy(emptySystem)
system = copy.deepcopy(defaultSystem)

# Extract system label
if SYSTEM_STR in para:
Expand Down Expand Up @@ -839,7 +759,7 @@ def _get_system_group(stripped_para_text: List[str]) -> List[System]:
if re.search(pattern, stripped_system_text[1]):
row_text = re.findall(pattern, stripped_system_text[1])[0]

row = copy.deepcopy(emptyRow)
row = copy.deepcopy(defaultRow)
row['rowType'] = row_text[0]
row['rowBase'] = row_text[1]
if len(row_text) > 3:
Expand Down

0 comments on commit c5ac29e

Please sign in to comment.