updated the code to export layout after re-annotated the DP-Bench dat…

…aset Signed-off-by: Peter Staar <[email protected]>
DS4SD · Jan 10, 2025 · 29fa042 · 29fa042
1 parent 3006842
commit 29fa042
Show file tree

Hide file tree

Showing 5 changed files with 320 additions and 39 deletions.
diff --git a/docling_eval/benchmarks/annotation_formats/create.py b/docling_eval/benchmarks/annotation_formats/create.py
@@ -33,10 +33,24 @@
 )
 
 from docling_eval.docling.utils import from_pil_to_base64uri, crop_bounding_box
-from docling_eval.docling.utils import insert_images
+from docling_eval.docling.utils import (
+    insert_images,
+    extract_images,
+    docling_version,
+    get_binary,
+    save_shard_to_disk
+)
 
 from docling_eval.benchmarks.constants import BenchMarkColumns
-from docling_eval.benchmarks.utils import draw_clusters_with_reading_order, save_inspection_html
+from docling_eval.benchmarks.utils import (
+    draw_clusters_with_reading_order,
+    save_inspection_html,
+    save_comparison_html_with_clusters,
+    write_datasets_info,
+
+)
+
+from docling_eval.docling.conversion import create_converter
 
 # Configure logging
 logging.basicConfig(
@@ -669,6 +683,131 @@ def parse_args():
     DocItemLabel.FOOTNOTE,
 }
 
+PRED_HTML_EXPORT_LABELS = {
+    DocItemLabel.TITLE,
+    DocItemLabel.DOCUMENT_INDEX,
+    DocItemLabel.SECTION_HEADER,
+    DocItemLabel.PARAGRAPH,
+    DocItemLabel.TABLE,
+    DocItemLabel.PICTURE,
+    DocItemLabel.FORMULA,
+    DocItemLabel.CHECKBOX_UNSELECTED,
+    DocItemLabel.CHECKBOX_SELECTED,
+    DocItemLabel.TEXT,
+    DocItemLabel.LIST_ITEM,
+    DocItemLabel.CODE,
+    DocItemLabel.REFERENCE,
+    # Additional
+    DocItemLabel.PAGE_HEADER,
+    DocItemLabel.PAGE_FOOTER,
+    DocItemLabel.FOOTNOTE,
+}
+
+def create_layout_dataset_from_annotations(input_dir:Path, annot_file:Path):
+
+    output_dir = input_dir / "layout" 
+
+    imgs_dir = input_dir / "imgs"
+    page_imgs_dir = input_dir / "page_imgs"
+    pdfs_dir = input_dir / "pdfs"
+
+    json_true_dir = input_dir / "json-groundtruth"
+    json_pred_dir = input_dir / "json-predictions"
+    json_anno_dir = input_dir / "json-annotations"
+
+    html_anno_dir = input_dir / "html-annotations"
+    html_viz_dir = input_dir / "html-annotatations-viz"
+
+    overview_file = input_dir / "overview_map.json"
+
+    with open(overview_file, "r") as fr:
+        overview = json.load(fr)
+
+    for _ in [input_dir, output_dir,
+              imgs_dir, page_imgs_dir, pdfs_dir,
+              json_true_dir, json_pred_dir, json_anno_dir,
+              html_anno_dir, html_viz_dir]:
+        os.makedirs(_, exist_ok=True)
+
+    image_scale = 2.0
+
+    # Create Converter
+    doc_converter = create_converter(page_image_scale=image_scale)
+
+    records = []
+    for desc, true_doc in tqdm(from_cvat_to_docling_document(annotation_filenames = [annot_file],
+                                                             overview=overview,
+                                                             pdfs_dir=pdfs_dir,
+                                                             imgs_dir=imgs_dir),
+                               total=len(overview),
+                               ncols=128,
+                               desc="Creating documents from annotations"):
+
+        basename = desc["basename"]
+
+        """
+        save_inspection_html(filename=str(html_viz_dir / f"{basename}.html"), doc = true_doc,
+                             labels=TRUE_HTML_EXPORT_LABELS)
+        """
+
+        pdf_file = desc["pdf_file"]
+
+        # Create the predicted Document
+        conv_results = doc_converter.convert(source=pdf_file, raises_on_error=True)
+        pred_doc = conv_results.document
+
+        true_doc, true_pictures, true_page_images = extract_images(
+            document=true_doc,
+            pictures_column=BenchMarkColumns.GROUNDTRUTH_PICTURES.value,  # pictures_column,
+            page_images_column=BenchMarkColumns.GROUNDTRUTH_PAGE_IMAGES.value,  # page_images_column,
+        )
+
+        pred_doc, pred_pictures, pred_page_images = extract_images(
+            document=pred_doc,
+            pictures_column=BenchMarkColumns.PREDICTION_PICTURES.value,  # pictures_column,
+            page_images_column=BenchMarkColumns.PREDICTION_PAGE_IMAGES.value,  # page_images_column,
+        )
+
+        if True:
+            save_comparison_html_with_clusters(
+                filename=html_viz_dir / f"{basename}-clusters.html",
+                true_doc=true_doc,
+                pred_doc=pred_doc,
+                page_image=true_page_images[0],
+                true_labels=TRUE_HTML_EXPORT_LABELS,
+                pred_labels=PRED_HTML_EXPORT_LABELS,
+            )
+
+        record = {
+            BenchMarkColumns.DOCLING_VERSION: docling_version(),
+            BenchMarkColumns.STATUS: str(conv_results.status),
+            BenchMarkColumns.DOC_ID: str(basename),
+
+            BenchMarkColumns.GROUNDTRUTH: json.dumps(true_doc.export_to_dict()),
+            BenchMarkColumns.GROUNDTRUTH_PAGE_IMAGES: true_page_images,
+            BenchMarkColumns.GROUNDTRUTH_PICTURES: true_pictures,
+
+            BenchMarkColumns.PREDICTION: json.dumps(pred_doc.export_to_dict()),
+            BenchMarkColumns.PREDICTION_PAGE_IMAGES: pred_page_images,
+            BenchMarkColumns.PREDICTION_PICTURES: pred_pictures,
+
+            BenchMarkColumns.ORIGINAL: get_binary(pdf_file),
+            BenchMarkColumns.MIMETYPE: "application/pdf",
+        }
+        records.append(record)
+
+    test_dir = output_dir / "test"
+    os.makedirs(test_dir, exist_ok=True)
+
+    save_shard_to_disk(items=records, dataset_path=test_dir)
+
+    write_datasets_info(
+        name="DPBench: end-to-end",
+        output_dir=output_dir,
+        num_train_rows=0,
+        num_test_rows=len(records),
+    )
+
 def main():
 
     input_dir, preannot_file = parse_args()
@@ -692,6 +831,11 @@ def main():
     for _ in [input_dir, imgs_dir, page_imgs_dir, pdfs_dir, json_true_dir, json_pred_dir, json_anno_dir, html_anno_dir, html_viz_dir]:
         os.makedirs(_, exist_ok=True)
 
+    image_scale = 2.0
+
+    # Create Converter
+    doc_converter = create_converter(page_image_scale=image_scale)
+
     for desc, true_doc in tqdm(from_cvat_to_docling_document(annotation_filenames = [preannot_file],
                                                              overview=overview,
                                                              pdfs_dir=pdfs_dir,
@@ -701,10 +845,15 @@ def main():
                                desc="Creating documents from annotations"):
 
         basename = desc["basename"]
+
 
         save_inspection_html(filename=str(html_viz_dir / f"{basename}.html"), doc = true_doc,
                              labels=TRUE_HTML_EXPORT_LABELS)
 
 
 if __name__ == "__main__":
-    main()
+    #main()
+
+    input_dir, annot_file = parse_args()
+
+    create_layout_dataset_from_annotations(input_dir=input_dir, annot_file=annot_file)    
diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
@@ -154,6 +154,15 @@ def visualise(
             + tabulate(data, headers=headers, tablefmt="github")
         )
 
+        data, headers = layout_evaluation.mAP_stats.to_table()
+        logging.info(
+            "TEDS table: \n\n" + tabulate(data, headers=headers, tablefmt="github")
+        )
+
+        figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
+        layout_evaluation.mAP_stats.save_histogram(figname=figname, name="struct-with-text")
+
+
     elif modality == EvaluationModality.TABLEFORMER:
 
         with open(filename, "r") as fd:

diff --git a/docling_eval/evaluators/layout_evaluator.py b/docling_eval/evaluators/layout_evaluator.py
@@ -17,39 +17,50 @@
 
 from docling_eval.benchmarks.constants import BenchMarkColumns
 
+from docling_eval.evaluators.utils import DatasetStatistics, compute_stats
 
-class LayoutEvaluation(BaseModel):
+
+class ClassLayoutEvaluation(BaseModel):
     name: str
     label: str
     value: float
 
+class ImageLayoutEvaluation(BaseModel):
+    name: str
+    value: float
 
 class DatasetLayoutEvaluation(BaseModel):
     true_labels: Dict[str, int]
     pred_labels: Dict[str, int]
 
     intersecting_labels: List[str]
 
-    evaluations: List[LayoutEvaluation]
+    evaluations_per_class: List[ClassLayoutEvaluation]
 
+    evaluations_per_image: List[ImageLayoutEvaluation]
+
+    mAP_stats: DatasetStatistics
+
     def to_table(self) -> Tuple[List[List[str]], List[str]]:
 
         headers = ["label", "Class mAP[0.5:0.95]"]
 
-        self.evaluations = sorted(self.evaluations, key=lambda x: x.value, reverse=True)
+        self.evaluations_per_class = sorted(self.evaluations_per_class, key=lambda x: x.value, reverse=True)
 
         table = []
-        for i in range(len(self.evaluations)):
+        for i in range(len(self.evaluations_per_class)):
             table.append(
                 [
-                    f"{self.evaluations[i].label}",
-                    f"{100.0*self.evaluations[i].value:.2f}",
+                    f"{self.evaluations_per_class[i].label}",
+                    f"{100.0*self.evaluations_per_class[i].value:.2f}",
                 ]
             )
 
         return table, headers
 
 
+
+
 class LayoutEvaluator:
 
     def __init__(self) -> None:
@@ -88,6 +99,7 @@ def __call__(self, ds_path: Path, split: str = "test") -> DatasetLayoutEvaluatio
         )
         logging.info(f"Intersection labels: {intersection_labels}")
 
+        doc_ids = []
         ground_truths = []
         predictions = []
 
@@ -110,14 +122,21 @@ def __call__(self, ds_path: Path, split: str = "test") -> DatasetLayoutEvaluatio
             )
 
             if len(gts) == len(preds):
+                for i in range(len(gts)):
+                    doc_ids.append(data[BenchMarkColumns.DOC_ID] + f"-page-{i}")
+
                 ground_truths.extend(gts)
                 predictions.extend(preds)
             else:
                 logging.error("Ignoring predictions for document")
 
-        assert len(ground_truths) == len(
+        assert len(doc_ids) == len(
+            ground_truths
+        ), "doc_ids==len(ground_truths)"
+
+        assert len(doc_ids) == len(
             predictions
-        ), "len(ground_truths)==len(predictions)"
+        ), "doc_ids==len(predictions)"
 
         # Initialize Mean Average Precision metric
         metric = MeanAveragePrecision(iou_type="bbox", class_metrics=True)
@@ -136,38 +155,42 @@ def __call__(self, ds_path: Path, split: str = "test") -> DatasetLayoutEvaluatio
         if "map_per_class" in result:
             for label_idx, class_map in enumerate(result["map_per_class"]):
                 evaluations.append(
-                    LayoutEvaluation(
+                    ClassLayoutEvaluation(
                         name="Class mAP[0.5:0.95]",
                         label=intersection_labels[label_idx].value,
                         value=class_map,
                     )
                 )
-
-        """
-        # Print results
-        print("Results:")
-        for key, value in result.items():
-            try:
-                print(f"{key}: {value:.3f}")
-            except:
-                print(f"{key}: {value}")
-
-        # Overall mAP
-        print(f"Overall mAP[0.5:0.95]: {result['map'].item():.3f}")
-
-        print("\nPer-Class mAP[0.5:0.95]:")
-        if "map_per_class" in result:
-            for label_idx, class_map in enumerate(result["map_per_class"]):
-                # label_name = self.label_names.get(label_idx, f"Class {label_idx}")  # Use label name or default
-                print(
-                    f" => {label_idx} {intersection_labels[label_idx].value}: {class_map:.3f}"
-                )
-        """
+
+        # Compute mAP for each image individually
+        map_values = []
+
+        evaluations_per_image: List[ImageLayoutEvaluation] = []
+        for doc_id, pred, gt in zip(doc_ids, predictions, ground_truths):
+            # Reset the metric for the next image                
+            metric.reset()              
+
+            # Update with single image
+            metric.update([pred], [gt])
+
+            # Compute metrics
+            result = metric.compute()   
+
+            # Extract mAP for this image
+            map_value = float(result["map"].item())
+
+            map_values.append(map_value)
+            evaluations_per_image.append(ImageLayoutEvaluation(name=doc_id, value=map_value))
 
         return DatasetLayoutEvaluation(
-            evaluations=evaluations,
+            evaluations_per_class=evaluations,
+            evaluations_per_image=evaluations_per_image,
+
+            mAP_stats = compute_stats(map_values),
+
             true_labels=true_labels,
             pred_labels=pred_labels,
+
             intersecting_labels=[_.value for _ in intersection_labels],
         )
 
@@ -271,7 +294,7 @@ def _evaluate_layouts_in_documents(
         ground_truths = []
         predictions = []
 
-        # logging.info("\n\n ============================================ \n\n")
+        # logging.info(f"\n\n ================= {true_doc.name}, {pred_doc.name} ===================== \n\n")
 
         for page_no, items in true_pages_to_objects.items():