Skip to content

Commit

Permalink
drop redundant traverse_inline, use only visited
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Feb 20, 2025
1 parent f595166 commit 538aa73
Showing 1 changed file with 10 additions and 17 deletions.
27 changes: 10 additions & 17 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,6 @@ def iterate_items(
page_no: Optional[int] = None,
included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS,
_level: int = 0, # fixed parameter, carries through the node nesting level
traverse_inline: bool = True,
) -> typing.Iterable[Tuple[NodeItem, int]]: # tuple of node and level
"""iterate_elements.
Expand All @@ -1890,7 +1889,6 @@ def iterate_items(
:param traverse_pictures: bool: (Default value = False)
:param page_no: Optional[int]: (Default value = None)
:param _level: (Default value = 0)
:param traverse_inline: bool: (Default value = True)
:param # fixed parameter:
:param carries through the node nesting level:
"""
Expand All @@ -1915,12 +1913,8 @@ def iterate_items(
if should_yield:
yield root, _level

# Prevent children traversal when necessary
if (isinstance(root, PictureItem) and not traverse_pictures) or (
isinstance(root, GroupItem)
and root.label == GroupLabel.INLINE
and not traverse_inline
):
# Handle picture traversal - only traverse children if requested
if isinstance(root, PictureItem) and not traverse_pictures:
return

# Traverse children
Expand All @@ -1933,7 +1927,6 @@ def iterate_items(
traverse_pictures=traverse_pictures,
page_no=page_no,
_level=_level + 1,
traverse_inline=traverse_inline,
)

def _clear_picture_pil_cache(self):
Expand Down Expand Up @@ -2243,7 +2236,7 @@ def export_to_markdown( # noqa: C901
text_width=text_width,
page_no=page_no,
is_inline_scope=False,
visited_ids=set(),
visited=set(),
)
return tmp or ""

Expand All @@ -2262,17 +2255,13 @@ def _export_to_markdown( # noqa: C901
text_width: int,
page_no: Optional[int],
is_inline_scope: bool,
visited_ids: set[str],
visited: set[str], # refs of visited items
) -> Optional[str]:
paragraphs: list[str] = []
list_item_texts: list[str] = []
list_nesting_level = 0 # Track the current list nesting level
previous_level = 0 # Track the previous item's level
in_list = False # Track if we're currently processing list items
if node.self_ref in visited_ids:
return None
else:
visited_ids.add(node.self_ref)

# Our export markdown doesn't contain any emphasis styling:
# Bold, Italic, or Bold-Italic
Expand Down Expand Up @@ -2322,9 +2311,13 @@ def _ingest_text(text: str, do_escape_html=True, do_escape_underscores=True):
node,
with_groups=True,
page_no=page_no,
traverse_inline=is_inline_scope,
)
):
if item.self_ref in visited:
continue
else:
visited.add(item.self_ref)

# If we've moved to a lower level, we're exiting one or more groups
if level < previous_level:
# Calculate how many levels we've exited
Expand Down Expand Up @@ -2377,7 +2370,7 @@ def _ingest_text(text: str, do_escape_html=True, do_escape_underscores=True):
text_width=text_width,
page_no=page_no,
is_inline_scope=True,
visited_ids=visited_ids,
visited=visited,
)
if inline_text:
_ingest_text(inline_text)
Expand Down

0 comments on commit 538aa73

Please sign in to comment.