clean up the DP-Bench example

Signed-off-by: Peter Staar <[email protected]>
DS4SD · Jan 1, 2025 · 9f23cca · 9f23cca
1 parent 8ef5c06
commit 9f23cca
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 63 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
@@ -127,54 +127,23 @@ def visualise(
         with open(filename, "r") as fd:
             evaluation = DatasetLayoutEvaluation.parse_file(filename)
 
-        print(evaluation)
-
-        table = []
-        for i in range(len(evaluation.evaluations)):
-            table.append([
-                f"{i:02}",
-                f"{evaluation.evaluations[i].label}",
-                #f"{evaluation.evaluations[i].name}",
-                f"{evaluation.evaluations[i].value:.3f}",
-            ])
-        logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(table, headers=["index", "label", "Class mAP[0.5:0.95]"], tablefmt="github"))
+        table, headers = evaluation.to_table()
+
+        logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(table, headers=headers, tablefmt="github"))
 
     elif modality == EvaluationModality.TABLEFORMER:
 
         with open(filename, "r") as fd:
             evaluation = DatasetTableEvaluation.parse_file(filename)
 
-        # Calculate bin widths
-        bin_widths = [
-            evaluation.TEDS.bins[i + 1] - evaluation.TEDS.bins[i]
-            for i in range(len(evaluation.TEDS.bins) - 1)
-        ]
-        bin_middle = [
-            (evaluation.TEDS.bins[i + 1] + evaluation.TEDS.bins[i]) / 2.0
-            for i in range(len(evaluation.TEDS.bins) - 1)
-        ]
-
-        table = []
-        for i in range(len(evaluation.TEDS.bins) - 1):
-            table.append([
-                f"{i:02}",
-                f"{evaluation.TEDS.bins[i+0]:.3f}",
-                f"{evaluation.TEDS.bins[i+1]:.3f}",
-                f"{evaluation.TEDS.hist[i]}",
-                f"{100.0*evaluation.TEDS.hist[i]/float(evaluation.TEDS.total):.3f}"
-            ])
-        logging.info("TEDS table: \n\n"+tabulate(table, headers=["index", "x0<TEDS", "TEDS<x1", "count", "%"], tablefmt="github"))
+        table, headers = evaluation.to_table()        
+        logging.info("TEDS table: \n\n"+tabulate(table, headers=headers, tablefmt="github"))
 
-        # Plot histogram
-        plt.bar(bin_middle, evaluation.TEDS.hist, width=bin_widths, edgecolor="black")
-
-        plt.xlabel("TEDS")
-        plt.ylabel("Frequency")
-        plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")
-
         figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
         logging.info(f"saving figure to {figname}")
-        plt.savefig(figname)
+
+        evaluation.save_histogram(figname)
+
 
     elif modality == EvaluationModality.CODEFORMER:
         pass

diff --git a/docling_eval/evaluators/layout_evaluator.py b/docling_eval/evaluators/layout_evaluator.py
@@ -33,13 +33,21 @@ class DatasetLayoutEvaluation(BaseModel):
 
     evaluations: List[LayoutEvaluation]
 
-    def to_table(self) -> List[List[str]]:
-        table = []
-        for _ in self.evaluations:
-            table.append([_.name, "" if _.label is None else _.label, f"{_.value:.3f}"])
+    def to_table(self) -> Tuple[List[List[str]], List[str]]:
 
-        return table
+        headers=["label", "Class mAP[0.5:0.95]"]
 
+        self.evaluations = sorted(self.evaluations, key=lambda x: x.value, reverse=True)
+
+        table = []
+        for i in range(len(self.evaluations)):
+            table.append([
+                f"{self.evaluations[i].label}",
+                f"{100.0*self.evaluations[i].value:.2f}",
+            ])
+
+        return table, headers
+
 
 class LayoutEvaluator:
 
@@ -203,14 +211,16 @@ def _find_intersecting_labels(
                         else:
                             pred_labels[item.label] = 1
 
+        """
         logging.info(f"True labels:")
         for label, count in true_labels.items():
             logging.info(f" => {label}: {count}")
 
         logging.info(f"Pred labels:")
         for label, count in pred_labels.items():
             logging.info(f" => {label}: {count}")
-
+        """
+
         intersection_labels: List[DocItemLabel] = []
         for label, count in true_labels.items():
             if label in pred_labels:

diff --git a/docling_eval/evaluators/table_evaluator.py b/docling_eval/evaluators/table_evaluator.py
@@ -50,7 +50,10 @@ def check_bins_and_hist_lengths(cls, values):
             raise ValueError("`bins` must have exactly one more element than `hist`.")
         return values
 
-    def to_table(self) -> List[List[str]]:
+    def to_table(self) -> Tuple[List[List[str]], List[str]]:
+
+        headers = ["x0<=TEDS", "TEDS<=x1", "%", "total"]
+
         # Calculate bin widths
         bin_widths = [
             self.bins[i + 1] - self.bins[i] for i in range(len(self.bins) - 1)
@@ -65,12 +68,12 @@ def to_table(self) -> List[List[str]]:
                 [
                     f"{self.bins[i+0]:.3f}",
                     f"{self.bins[i+1]:.3f}",
-                    f"{float(self.hist[i])/float(self.total):.3f}",
+                    f"{100.0*float(self.hist[i])/float(self.total):.2f}",
                     f"{self.hist[i]}",
                 ]
             )
 
-        return table
+        return table, headers
 
     def save_histogram(self, figname: Path):
         # Calculate bin widths
@@ -81,20 +84,20 @@ def save_histogram(self, figname: Path):
             (self.bins[i + 1] + self.bins[i]) / 2.0 for i in range(len(self.bins) - 1)
         ]
 
+        """
         for i in range(len(self.bins) - 1):
             logging.info(
                 f"{i:02} [{self.bins[i]:.3f}, {self.bins[i+1]:.3f}]: {self.hist[i]}"
             )
-
+        """
+
         # Plot histogram
         plt.bar(bin_middle, self.hist, width=bin_widths, edgecolor="black")
-        # width=(evaluation.TEDS.bins[1] - evaluation.TEDS.bins[0]),
 
         plt.xlabel("TEDS")
         plt.ylabel("Frequency")
-        # plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")
+        plt.title(f"mean: {self.mean:.2f}, median: {self.median:.2f}, std: {self.std:.2f}, total: {self.total}")
 
-        # figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
         logging.info(f"saving figure to {figname}")
         plt.savefig(figname)
 

diff --git a/docs/examples/benchmark_dpbench.py b/docs/examples/benchmark_dpbench.py
@@ -52,13 +52,14 @@ def main():
 
     image_scale = 1.0
 
-    create_dpbench_layout_dataset(
-        dpbench_dir=idir, output_dir=odir_lay, image_scale=image_scale
-    )
+    if True:
+        create_dpbench_layout_dataset(
+            dpbench_dir=idir, output_dir=odir_lay, image_scale=image_scale
+        )
 
-    create_dpbench_tableformer_dataset(
-        dpbench_dir=idir, output_dir=odir_tab, image_scale=image_scale
-    )
+        create_dpbench_tableformer_dataset(
+            dpbench_dir=idir, output_dir=odir_tab, image_scale=image_scale
+        )
 
     if True:
         save_fn = (
@@ -73,26 +74,28 @@ def main():
         with open(save_fn, "w") as fd:
             json.dump(layout_evaluation.model_dump(), fd, indent=2, sort_keys=True)
 
-        results = layout_evaluation.to_table()
-        logging.info(f"mAP results for layout:\n\n{tabulate(results)}")
+        data, headers = layout_evaluation.to_table()
+        logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(data, headers=headers, tablefmt="github"))
 
     if True:
         save_fn = (
             odir
             / f"evaluation_{BenchMarkNames.DPBENCH.value}_{EvaluationModality.TABLEFORMER.value}.json"
         )
 
+        figname = odir / f"evaluation_{BenchMarkNames.DPBENCH.value}_{EvaluationModality.TABLEFORMER.value}.png"
+
         table_evaluator = TableEvaluator()
         table_evaluation = table_evaluator(odir_tab, split="test")
 
         logging.info(f"writing results to {save_fn}")
         with open(save_fn, "w") as fd:
             json.dump(table_evaluation.model_dump(), fd, indent=2, sort_keys=True)
 
-        results = table_evaluation.TEDS.to_table()
-        md = tabulate(results, headers=["x0 <= TEDS", "TEDS <= x1", "%", "count"])
-        logging.info(f"TEDS results for TableFormer:\n\n{md}")
+        data, headers = table_evaluation.TEDS.to_table()
+        logging.info("TEDS table: \n\n"+tabulate(data, headers=headers, tablefmt="github"))
 
+        table_evaluation.TEDS.save_histogram(figname=figname)
 
 if __name__ == "__main__":
     main()