Skip to content

Commit

Permalink
updated the README and the cli
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Jan 1, 2025
1 parent e1bbd20 commit 8ef5c06
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 25 deletions.
47 changes: 41 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,19 @@ poetry run evaluate -t evaluate -m layout -b DPBench -i ./benchmarks/dpbench-lay
👉 Create the dataset,

```sh
poetry run evaluate -t create -m tableformer -b DPBench -i <location-of-dpbench> -o ./benchmarks/dpbench-tableformer
poetry run evaluate -t create -m tableformer -b DPBench -i ./benchmarks/dpbench-original -o ./benchmarks/dpbench-dataset/tableformer
```

👉 Evaluate the dataset,

```sh
poetry run evaluate -t evaluate -m tableformer -b DPBench -i ./benchmarks/dpbench-tableformer -o ./benchmarks/dpbench-tableformer
poetry run evaluate -t evaluate -m tableformer -b DPBench -i ./benchmarks/dpbench-dataset/tableformer -o ./benchmarks/dpbench-dataset/tableformer
```

👉 Visualise the dataset,

```sh
poetry run evaluate -t visualize -m tableformer -b DPBench -i ./benchmarks/dpbench-tableformer -o ./benchmarks/dpbench-tableformer
poetry run evaluate -t visualize -m tableformer -b DPBench -i ./benchmarks/dpbench-dataset/tableformer -o ./benchmarks/dpbench-dataset/tableformer
```

The final result can be visualised as,
Expand All @@ -110,26 +110,61 @@ Using a single command,
poetry run python ./docs/examples/benchmark_omnidocbench.py
```

<details>
<summary><b>Layout evaluation for OmniDocBench</b></summary>
<br>

👉 Create the dataset,

```sh
poetry run evaluate -t create -m layout -b OmniDocBench -i ./benchmarks/omnidocbench-original -o ./benchmarks/omnidocbench-dataset/layout
```

👉 Evaluate the dataset,

```sh
poetry run evaluate -t evaluate -m layout -b OmniDocBench -i ./benchmarks/omnidocbench-dataset/layout -o ./benchmarks/omnidocbench-dataset/layout
```

👉 Visualise the dataset,

```sh
poetry run evaluate -t visualize -m tableformer -b OmniDocBench -i ./benchmarks/OmniDocBench-dataset/layout -o ./benchmarks/OmniDocBench-dataset/layout
```

| index | label | Class mAP[0.5:0.95] |
|---------|----------------|-----------------------|
| 00 | picture | 0.293 |
| 01 | section_header | 0.131 |
| 02 | text | 0.24 |
| 03 | footnote | 0.025 |
| 04 | caption | 0.107 |
| 05 | table | 0.693 |
| 06 | page_header | 0.1 |
| 07 | page_footer | 0.161 |
| 08 | formula | 0.038 |
</details>

<details>
<summary><b>Table evaluations for OmniDocBench</b></summary>
<br>

👉 Create the dataset,

```sh
poetry run evaluate -t create -m tableformer -b OmniDocBench -i <location-of-omnidocbench> -o ./benchmarks/omnidocbench-tableformer
poetry run evaluate -t create -m tableformer -b OmniDocBench -i ./benchmarks/omnidocbench-original -o ./benchmarks/omnidocbench-dataset/tableformer
```

👉 Evaluate the dataset,

```sh
poetry run evaluate -t evaluate -m tableformer -b OmniDocBench -i ./benchmarks/omnidocbench-tableformer -o ./benchmarks/omnidocbench-tableformer
poetry run evaluate -t evaluate -m tableformer -b OmniDocBench -i ./benchmarks/omnidocbench-dataset/tableformer -o ./benchmarks/omnidocbench-dataset/tableformer
```

👉 Visualise the dataset,

```sh
poetry run evaluate -t visualize -m tableformer -b OmniDocBench -i ./benchmarks/OmniDocBench-dataset/tableformer/ -o ./benchmarks/OmniDocBench-dataset/tableformer/
poetry run evaluate -t visualize -m tableformer -b OmniDocBench -i ./benchmarks/OmniDocBench-dataset/tableformer -o ./benchmarks/OmniDocBench-dataset/tableformer
```

The final result can be visualised as,
Expand Down
2 changes: 1 addition & 1 deletion docling_eval/benchmarks/omnidocbench/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def create_omnidocbench_e2e_dataset(
jpg_path = page_tuple[0]
pdf_path = page_tuple[1]

logging.info(f"file: {pdf_path}")
# logging.info(f"file: {pdf_path}")
if not os.path.basename(jpg_path) in gt:
logging.error(f"did not find ground-truth for {os.path.basename(jpg_path)}")
continue
Expand Down
25 changes: 15 additions & 10 deletions docling_eval/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,20 @@ def visualise(
pass

elif modality == EvaluationModality.LAYOUT:
pass
with open(filename, "r") as fd:
evaluation = DatasetLayoutEvaluation.parse_file(filename)

print(evaluation)

table = []
for i in range(len(evaluation.evaluations)):
table.append([
f"{i:02}",
f"{evaluation.evaluations[i].label}",
#f"{evaluation.evaluations[i].name}",
f"{evaluation.evaluations[i].value:.3f}",
])
logging.info("Class mAP[0.5:0.95] table: \n\n"+tabulate(table, headers=["index", "label", "Class mAP[0.5:0.95]"], tablefmt="github"))

elif modality == EvaluationModality.TABLEFORMER:

Expand All @@ -150,18 +163,10 @@ def visualise(
f"{evaluation.TEDS.hist[i]}",
f"{100.0*evaluation.TEDS.hist[i]/float(evaluation.TEDS.total):.3f}"
])

"""
logging.info(
f"{i:02} [{evaluation.TEDS.bins[i]:.3f}, {evaluation.TEDS.bins[i+1]:.3f}]: {evaluation.TEDS.hist[i]}"
)
"""

logging.info("table: \n\n"+tabulate(table, headers=["index", "x0<TEDS", "TEDS<x1", "count", "%"], tablefmt="github"))
logging.info("TEDS table: \n\n"+tabulate(table, headers=["index", "x0<TEDS", "TEDS<x1", "count", "%"], tablefmt="github"))

# Plot histogram
plt.bar(bin_middle, evaluation.TEDS.hist, width=bin_widths, edgecolor="black")
# width=(evaluation.TEDS.bins[1] - evaluation.TEDS.bins[0]),

plt.xlabel("TEDS")
plt.ylabel("Frequency")
Expand Down
17 changes: 11 additions & 6 deletions docling_eval/evaluators/layout_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ def __call__(self, ds_path: Path, split: str = "test") -> DatasetLayoutEvaluatio
filter_labels=intersection_labels,
)

ground_truths.extend(gts)
predictions.extend(preds)

if len(gts)==len(preds):
ground_truths.extend(gts)
predictions.extend(preds)
else:
logging.error("Ignoring predictions for document")

assert len(ground_truths) == len(
predictions
), "len(ground_truths)==len(predictions)"
Expand Down Expand Up @@ -232,7 +235,7 @@ def _evaluate_layouts_in_documents(
# logging.info(f"#-true-tables: {len(true_tables)}, #-pred-tables: {len(pred_tables)}")
assert len(true_doc.pages) == len(
pred_doc.pages
), "len(true_doc.pages)==len(pred_doc.pages)"
), f"len(true_doc.pages)==len(pred_doc.pages) => {len(true_doc.pages)}=={len(pred_doc.pages)}"

# page_num -> List[DocItem]
true_pages_to_objects: Dict[int, List[DocItem]] = {}
Expand Down Expand Up @@ -318,8 +321,10 @@ def _evaluate_layouts_in_documents(
}
)

"""
assert len(ground_truths) == len(
predictions
), "len(ground_truths)==len(predictions)"

), f"len(ground_truths)==len(predictions) => {len(ground_truths)}=={len(predictions)}"
"""

return ground_truths, predictions
4 changes: 2 additions & 2 deletions docs/examples/benchmark_omnidocbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main():

image_scale = 1.0

if False:
if True:
create_omnidocbench_layout_dataset(
omnidocbench_dir=idir, output_dir=odir_lay, image_scale=image_scale
)
Expand All @@ -62,7 +62,7 @@ def main():
omnidocbench_dir=idir, output_dir=odir_tab, image_scale=image_scale
)

if False:
if True:
save_fn = (
odir
/ f"evaluation_{BenchMarkNames.OMNIDOCBENCH.value}_{EvaluationModality.LAYOUT.value}.json"
Expand Down

0 comments on commit 8ef5c06

Please sign in to comment.