fix: Add debug code to dump the predicted text in create_dlnv1_e2e_da…

…taset() Signed-off-by: Nikos Livathinos <[email protected]>
DS4SD · Feb 26, 2025 · 72bd897 · 72bd897
1 parent 2d2bfc1
commit 72bd897
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 2 deletions.
diff --git a/docling_eval/benchmarks/doclaynet_v1/create.py b/docling_eval/benchmarks/doclaynet_v1/create.py
@@ -180,6 +180,7 @@ def create_dlnv1_e2e_dataset(
     converter_type: ConverterTypes = ConverterTypes.DOCLING,
     do_viz: bool = False,
     max_items: int = -1,  # If -1 take the whole split
+    do_save_page_text: bool = False,
 ):
     ds = load_dataset(name, split=split)
 
@@ -217,6 +218,16 @@ def create_dlnv1_e2e_dataset(
 
         pred_doc = conv_results.document
 
+        # Debugging code that dumps the VLM predicted text in files
+        if do_save_page_text:
+            debug_dir = output_dir / "debug"
+            os.makedirs(debug_dir, exist_ok=True)
+            if len(conv_results.pages):
+                for page_id, page in enumerate(conv_results.pages):
+                    page_text_fn = debug_dir / f"{page_hash}_{page_id}.txt"
+                    with open(page_text_fn, "w") as fd:
+                        fd.write(page.predictions.vlm_response.text)
+
         true_doc = DoclingDocument(name=page_hash)
         true_doc, true_page_images = add_pages_to_true_doc(
             pdf_path=pdf_stream, true_doc=true_doc, image_scale=1.0

diff --git a/docling_eval/benchmarks/utils.py b/docling_eval/benchmarks/utils.py
@@ -154,7 +154,7 @@ def yield_cells_from_html_table(
 ):
     soup = BeautifulSoup(table_html, "html.parser")
     table = soup.find("table") or soup  # Ensure table context
-    rows = table.find_all("tr")
+    rows = table.find_all("tr")  # type: ignore
 
     max_cols = 0
     for row in rows:

diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
@@ -229,6 +229,7 @@ def create(
                 converter_type=converter_type,
                 do_viz=True,
                 max_items=max_items,
+                do_save_page_text=True,  # TODO: Debug
             )
         else:
             log.error(f"{modality} is not yet implemented for {benchmark}")

diff --git a/docling_eval/converters/conversion.py b/docling_eval/converters/conversion.py
@@ -136,7 +136,6 @@ def create_smol_docling_converter(
 ):
     pipeline_options = VlmPipelineOptions()
     pipeline_options.generate_page_images = True
-    pipeline_options.accelerator_options.device = "cuda:0"
     pipeline_options.accelerator_options.cuda_use_flash_attention2 = True
     pipeline_options.vlm_options = smoldocling_vlm_conversion_options