Skip to content

Commit

Permalink
clean up the DP-Bench example
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Jan 1, 2025
1 parent 8ef5c06 commit 9f23cca
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 63 deletions.
Empty file removed benchmarks/README.md
Empty file.
47 changes: 8 additions & 39 deletions docling_eval/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,54 +127,23 @@ def visualise(
with open(filename, "r") as fd:
evaluation = DatasetLayoutEvaluation.parse_file(filename)

print(evaluation)

table = []
for i in range(len(evaluation.evaluations)):
table.append([
f"{i:02}",
f"{evaluation.evaluations[i].label}",
#f"{evaluation.evaluations[i].name}",
f"{evaluation.evaluations[i].value:.3f}",
])
logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(table, headers=["index", "label", "Class mAP[0.5:0.95]"], tablefmt="github"))
table, headers = evaluation.to_table()

logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(table, headers=headers, tablefmt="github"))

elif modality == EvaluationModality.TABLEFORMER:

with open(filename, "r") as fd:
evaluation = DatasetTableEvaluation.parse_file(filename)

# Calculate bin widths
bin_widths = [
evaluation.TEDS.bins[i + 1] - evaluation.TEDS.bins[i]
for i in range(len(evaluation.TEDS.bins) - 1)
]
bin_middle = [
(evaluation.TEDS.bins[i + 1] + evaluation.TEDS.bins[i]) / 2.0
for i in range(len(evaluation.TEDS.bins) - 1)
]

table = []
for i in range(len(evaluation.TEDS.bins) - 1):
table.append([
f"{i:02}",
f"{evaluation.TEDS.bins[i+0]:.3f}",
f"{evaluation.TEDS.bins[i+1]:.3f}",
f"{evaluation.TEDS.hist[i]}",
f"{100.0*evaluation.TEDS.hist[i]/float(evaluation.TEDS.total):.3f}"
])
logging.info("TEDS table: \n\n"+tabulate(table, headers=["index", "x0<TEDS", "TEDS<x1", "count", "%"], tablefmt="github"))
table, headers = evaluation.to_table()
logging.info("TEDS table: \n\n"+tabulate(table, headers=headers, tablefmt="github"))

# Plot histogram
plt.bar(bin_middle, evaluation.TEDS.hist, width=bin_widths, edgecolor="black")

plt.xlabel("TEDS")
plt.ylabel("Frequency")
plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")

figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
logging.info(f"saving figure to {figname}")
plt.savefig(figname)

evaluation.save_histogram(figname)


elif modality == EvaluationModality.CODEFORMER:
pass
Expand Down
22 changes: 16 additions & 6 deletions docling_eval/evaluators/layout_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,21 @@ class DatasetLayoutEvaluation(BaseModel):

evaluations: List[LayoutEvaluation]

def to_table(self) -> List[List[str]]:
table = []
for _ in self.evaluations:
table.append([_.name, "" if _.label is None else _.label, f"{_.value:.3f}"])
def to_table(self) -> Tuple[List[List[str]], List[str]]:

return table
headers=["label", "Class mAP[0.5:0.95]"]

self.evaluations = sorted(self.evaluations, key=lambda x: x.value, reverse=True)

table = []
for i in range(len(self.evaluations)):
table.append([
f"{self.evaluations[i].label}",
f"{100.0*self.evaluations[i].value:.2f}",
])

return table, headers


class LayoutEvaluator:

Expand Down Expand Up @@ -203,14 +211,16 @@ def _find_intersecting_labels(
else:
pred_labels[item.label] = 1

"""
logging.info(f"True labels:")
for label, count in true_labels.items():
logging.info(f" => {label}: {count}")
logging.info(f"Pred labels:")
for label, count in pred_labels.items():
logging.info(f" => {label}: {count}")

"""

intersection_labels: List[DocItemLabel] = []
for label, count in true_labels.items():
if label in pred_labels:
Expand Down
17 changes: 10 additions & 7 deletions docling_eval/evaluators/table_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ def check_bins_and_hist_lengths(cls, values):
raise ValueError("`bins` must have exactly one more element than `hist`.")
return values

def to_table(self) -> List[List[str]]:
def to_table(self) -> Tuple[List[List[str]], List[str]]:

headers = ["x0<=TEDS", "TEDS<=x1", "%", "total"]

# Calculate bin widths
bin_widths = [
self.bins[i + 1] - self.bins[i] for i in range(len(self.bins) - 1)
Expand All @@ -65,12 +68,12 @@ def to_table(self) -> List[List[str]]:
[
f"{self.bins[i+0]:.3f}",
f"{self.bins[i+1]:.3f}",
f"{float(self.hist[i])/float(self.total):.3f}",
f"{100.0*float(self.hist[i])/float(self.total):.2f}",
f"{self.hist[i]}",
]
)

return table
return table, headers

def save_histogram(self, figname: Path):
# Calculate bin widths
Expand All @@ -81,20 +84,20 @@ def save_histogram(self, figname: Path):
(self.bins[i + 1] + self.bins[i]) / 2.0 for i in range(len(self.bins) - 1)
]

"""
for i in range(len(self.bins) - 1):
logging.info(
f"{i:02} [{self.bins[i]:.3f}, {self.bins[i+1]:.3f}]: {self.hist[i]}"
)

"""

# Plot histogram
plt.bar(bin_middle, self.hist, width=bin_widths, edgecolor="black")
# width=(evaluation.TEDS.bins[1] - evaluation.TEDS.bins[0]),

plt.xlabel("TEDS")
plt.ylabel("Frequency")
# plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")
plt.title(f"mean: {self.mean:.2f}, median: {self.median:.2f}, std: {self.std:.2f}, total: {self.total}")

# figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
logging.info(f"saving figure to {figname}")
plt.savefig(figname)

Expand Down
25 changes: 14 additions & 11 deletions docs/examples/benchmark_dpbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,14 @@ def main():

image_scale = 1.0

create_dpbench_layout_dataset(
dpbench_dir=idir, output_dir=odir_lay, image_scale=image_scale
)
if True:
create_dpbench_layout_dataset(
dpbench_dir=idir, output_dir=odir_lay, image_scale=image_scale
)

create_dpbench_tableformer_dataset(
dpbench_dir=idir, output_dir=odir_tab, image_scale=image_scale
)
create_dpbench_tableformer_dataset(
dpbench_dir=idir, output_dir=odir_tab, image_scale=image_scale
)

if True:
save_fn = (
Expand All @@ -73,26 +74,28 @@ def main():
with open(save_fn, "w") as fd:
json.dump(layout_evaluation.model_dump(), fd, indent=2, sort_keys=True)

results = layout_evaluation.to_table()
logging.info(f"mAP results for layout:\n\n{tabulate(results)}")
data, headers = layout_evaluation.to_table()
logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(data, headers=headers, tablefmt="github"))

if True:
save_fn = (
odir
/ f"evaluation_{BenchMarkNames.DPBENCH.value}_{EvaluationModality.TABLEFORMER.value}.json"
)

figname = odir / f"evaluation_{BenchMarkNames.DPBENCH.value}_{EvaluationModality.TABLEFORMER.value}.png"

table_evaluator = TableEvaluator()
table_evaluation = table_evaluator(odir_tab, split="test")

logging.info(f"writing results to {save_fn}")
with open(save_fn, "w") as fd:
json.dump(table_evaluation.model_dump(), fd, indent=2, sort_keys=True)

results = table_evaluation.TEDS.to_table()
md = tabulate(results, headers=["x0 <= TEDS", "TEDS <= x1", "%", "count"])
logging.info(f"TEDS results for TableFormer:\n\n{md}")
data, headers = table_evaluation.TEDS.to_table()
logging.info("TEDS table: \n\n"+tabulate(data, headers=headers, tablefmt="github"))

table_evaluation.TEDS.save_histogram(figname=figname)

if __name__ == "__main__":
main()

0 comments on commit 9f23cca

Please sign in to comment.