Skip to content

Commit

Permalink
reformatted the code
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Dec 20, 2024
1 parent 34e5df1 commit 3e364d2
Show file tree
Hide file tree
Showing 5 changed files with 389 additions and 262 deletions.
28 changes: 21 additions & 7 deletions docling_eval/benchmarks/dpbench/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import os
from pathlib import Path
from typing import Dict, List
from tqdm import tqdm

import pypdfium2 as pdfium
from tqdm import tqdm # type: ignore

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -235,7 +235,9 @@ def update(doc: DoclingDocument, annots: Dict, page_width: float, page_height: f
return


def create_dpbench_e2e_dataset(dpbench_dir: Path, output_dir: Path, image_scale: float = 1.0):
def create_dpbench_e2e_dataset(
dpbench_dir: Path, output_dir: Path, image_scale: float = 1.0
):

# Create Converter
doc_converter = create_converter(
Expand All @@ -248,7 +250,12 @@ def create_dpbench_e2e_dataset(dpbench_dir: Path, output_dir: Path, image_scale:

records = []

for filename, annots in tqdm(gt.items(), desc="Processing files for DP-Bench with end-to-end", total=len(gt), ncols=128):
for filename, annots in tqdm(
gt.items(),
desc="Processing files for DP-Bench with end-to-end",
total=len(gt),
ncols=128,
):

pdf_path = dpbench_dir / f"dataset/pdfs/{filename}"
# logging.info(f"\n\n===============================\n\nfile: {pdf_path}\n\n")
Expand Down Expand Up @@ -287,7 +294,7 @@ def create_dpbench_e2e_dataset(dpbench_dir: Path, output_dir: Path, image_scale:

test_dir = output_dir / "test"
os.makedirs(test_dir, exist_ok=True)

save_shard_to_disk(items=records, dataset_path=test_dir)

write_datasets_info(
Expand All @@ -298,7 +305,9 @@ def create_dpbench_e2e_dataset(dpbench_dir: Path, output_dir: Path, image_scale:
)


def create_dpbench_tableformer_dataset(dpbench_dir: Path, output_dir: Path, image_scale: float = 1.0):
def create_dpbench_tableformer_dataset(
dpbench_dir: Path, output_dir: Path, image_scale: float = 1.0
):

tf_config = init_tf_model()

Expand All @@ -308,7 +317,12 @@ def create_dpbench_tableformer_dataset(dpbench_dir: Path, output_dir: Path, imag

records = []

for filename, annots in tqdm(gt.items(), desc="Processing files for DP-Bench with TableFormer", total=len(gt), ncols=128):
for filename, annots in tqdm(
gt.items(),
desc="Processing files for DP-Bench with TableFormer",
total=len(gt),
ncols=128,
):

pdf_path = dpbench_dir / f"dataset/pdfs/{filename}"
# logging.info(f"\n\n===============================\n\nfile: {pdf_path}\n\n")
Expand Down Expand Up @@ -453,7 +467,7 @@ def create_dpbench_tableformer_dataset(dpbench_dir: Path, output_dir: Path, imag

test_dir = output_dir / "test"
os.makedirs(test_dir, exist_ok=True)

save_shard_to_disk(items=records, dataset_path=test_dir)

write_datasets_info(
Expand Down
6 changes: 1 addition & 5 deletions docling_eval/benchmarks/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import json
from pathlib import Path

from docling_eval.benchmarks.constants import BenchMarkNames

from docling_eval.benchmarks.constants import BenchMarkColumns
from docling_eval.benchmarks.constants import BenchMarkColumns, BenchMarkNames


def write_datasets_info(
Expand Down Expand Up @@ -37,5 +35,3 @@ def write_datasets_info(

with open(output_dir / f"dataset_infos.json", "w") as fw:
fw.write(json.dumps(dataset_infos, indent=2))


204 changes: 118 additions & 86 deletions docling_eval/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
import os
import json
import logging
import os
from enum import Enum, auto
from pathlib import Path
from typing import Annotated, Optional

import matplotlib.pyplot as plt
import typer

from docling_eval.benchmarks.constants import BenchMarkNames

from docling_eval.benchmarks.dpbench.create import create_dpbench_e2e_dataset, create_dpbench_tableformer_dataset

from docling_eval.evaluators.layout_evaluator import LayoutEvaluator, DatasetLayoutEvaluation
from docling_eval.evaluators.table_evaluator import TableEvaluator, DatasetTableEvaluation

import matplotlib.pyplot as plt
from docling_eval.benchmarks.dpbench.create import (
create_dpbench_e2e_dataset,
create_dpbench_tableformer_dataset,
)
from docling_eval.evaluators.layout_evaluator import (
DatasetLayoutEvaluation,
LayoutEvaluator,
)
from docling_eval.evaluators.table_evaluator import (
DatasetTableEvaluation,
TableEvaluator,
)

# Configure logging
logging.basicConfig(
Expand All @@ -35,112 +41,138 @@ class EvaluationTask(str, Enum):
EVALUATE = "evaluate"
VISUALIZE = "visualize"


class EvaluationModality(str, Enum):
END2END = "end-to-end"
LAYOUT = "layout"
TABLEFORMER = "tableformer"
CODEFORMER = "codeformer"


def create(modality:EvaluationModality, benchmark:BenchMarkNames, idir:Path, odir:Path=None, image_scale:float=1.0):
def create(
modality: EvaluationModality,
benchmark: BenchMarkNames,
idir: Path,
odir: Path,
image_scale: float = 1.0,
):
r""""""
if not os.path.exists(idir):
log.error(f"Benchmark directory not found: {idir}")
return

if odir is None:
odir = Path("./benchmarks") / benchmark.value / modality.value

match benchmark:
case BenchMarkNames.DPBENCH:

if(modality==EvaluationModality.END2END or
modality==EvaluationModality.LAYOUT):
create_dpbench_e2e_dataset(dpbench_dir=idir, output_dir=odir, image_scale=image_scale)
if benchmark == BenchMarkNames.DPBENCH:

elif(modality==EvaluationModality.TABLEFORMER):
create_dpbench_tableformer_dataset(dpbench_dir=idir, output_dir=odir, image_scale=image_scale)
if (
modality == EvaluationModality.END2END
or modality == EvaluationModality.LAYOUT
):
create_dpbench_e2e_dataset(
dpbench_dir=idir, output_dir=odir, image_scale=image_scale
)

else:
log.error(f"{modality} is not yet implemented for {benchmark}")
elif modality == EvaluationModality.TABLEFORMER:
create_dpbench_tableformer_dataset(
dpbench_dir=idir, output_dir=odir, image_scale=image_scale
)

case _:
log.error(f"{benchmark} is not yet implemented")
else:
log.error(f"{modality} is not yet implemented for {benchmark}")

else:
log.error(f"{benchmark} is not yet implemented")

def evaluate(modality:EvaluationModality, benchmark:BenchMarkNames, idir:Path, odir:Path):

def evaluate(
modality: EvaluationModality, benchmark: BenchMarkNames, idir: Path, odir: Path
):
r""""""
if not os.path.exists(idir):
log.error(f"Benchmark directory not found: {idir}")

match modality:
case EvaluationModality.END2END:
pass

case EvaluationModality.LAYOUT:
layout_evaluator = LayoutEvaluator()
ds_evaluation = layout_evaluator(idir, split="test")

case EvaluationModality.TABLEFORMER:
table_evaluator = TableEvaluator()
ds_evaluation = table_evaluator(idir, split="test")

case EvaluationModality.CODEFORMER:
pass

# Save the evaluation
save_fn = odir / f"evaluation_{benchmark.value}_{modality.value}.json"
with open(save_fn, "w") as fd:
json.dump(ds_evaluation.model_dump(), fd, indent=2, sort_keys=True)

if modality == EvaluationModality.END2END:
logging.error("not supported")

elif modality == EvaluationModality.LAYOUT:
layout_evaluator = LayoutEvaluator()
layout_evaluation = layout_evaluator(idir, split="test")

with open(save_fn, "w") as fd:
json.dump(layout_evaluation.model_dump(), fd, indent=2, sort_keys=True)

elif modality == EvaluationModality.TABLEFORMER:
table_evaluator = TableEvaluator()
table_evaluation = table_evaluator(idir, split="test")

with open(save_fn, "w") as fd:
json.dump(table_evaluation.model_dump(), fd, indent=2, sort_keys=True)

elif modality == EvaluationModality.CODEFORMER:
pass

log.info("The evaluation has been saved in '%s'", save_fn)

def visualise(modality:EvaluationModality, benchmark:BenchMarkNames, idir:Path, odir:Path):

def visualise(
modality: EvaluationModality, benchmark: BenchMarkNames, idir: Path, odir: Path
):

filename = odir / f"evaluation_{benchmark.value}_{modality.value}.json"

match modality:
case EvaluationModality.END2END:
pass

case EvaluationModality.LAYOUT:
pass

case EvaluationModality.TABLEFORMER:

with open(filename, "r") as fd:
evaluation = DatasetTableEvaluation.parse_file(filename)

# Calculate bin widths
bin_widths = [evaluation.TEDS.bins[i + 1] - evaluation.TEDS.bins[i] for i in range(len(evaluation.TEDS.bins) - 1)]
bin_middle = [(evaluation.TEDS.bins[i + 1] + evaluation.TEDS.bins[i])/2.0 for i in range(len(evaluation.TEDS.bins) - 1)]

for i in range(len(evaluation.TEDS.bins)-1):
logging.info(f"{i:02} [{evaluation.TEDS.bins[i]:.3f}, {evaluation.TEDS.bins[i+1]:.3f}]: {evaluation.TEDS.hist[i]}")

# Plot histogram
plt.bar(bin_middle, evaluation.TEDS.hist, width=bin_widths, edgecolor="black")
#width=(evaluation.TEDS.bins[1] - evaluation.TEDS.bins[0]),

plt.xlabel("TEDS")
plt.ylabel("Frequency")
plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")

figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
logging.info(f"saving figure to {figname}")
plt.savefig(figname)

case EvaluationModality.CODEFORMER:
pass

case _:
pass


if modality == EvaluationModality.END2END:
pass

elif modality == EvaluationModality.LAYOUT:
pass

elif modality == EvaluationModality.TABLEFORMER:

with open(filename, "r") as fd:
evaluation = DatasetTableEvaluation.parse_file(filename)

# Calculate bin widths
bin_widths = [
evaluation.TEDS.bins[i + 1] - evaluation.TEDS.bins[i]
for i in range(len(evaluation.TEDS.bins) - 1)
]
bin_middle = [
(evaluation.TEDS.bins[i + 1] + evaluation.TEDS.bins[i]) / 2.0
for i in range(len(evaluation.TEDS.bins) - 1)
]

for i in range(len(evaluation.TEDS.bins) - 1):
logging.info(
f"{i:02} [{evaluation.TEDS.bins[i]:.3f}, {evaluation.TEDS.bins[i+1]:.3f}]: {evaluation.TEDS.hist[i]}"
)

# Plot histogram
plt.bar(bin_middle, evaluation.TEDS.hist, width=bin_widths, edgecolor="black")
# width=(evaluation.TEDS.bins[1] - evaluation.TEDS.bins[0]),

plt.xlabel("TEDS")
plt.ylabel("Frequency")
plt.title(f"benchmark: {benchmark.value}, modality: {modality.value}")

figname = odir / f"evaluation_{benchmark.value}_{modality.value}.png"
logging.info(f"saving figure to {figname}")
plt.savefig(figname)

elif modality == EvaluationModality.CODEFORMER:
pass


@app.command(no_args_is_help=True)
def main(
task: Annotated[
EvaluationTask,
typer.Option(
..., #EvaluationTask.CREATE,
..., # EvaluationTask.CREATE,
"-t", # Short name
"--task", # Long name
help="Evaluation task",
Expand All @@ -149,21 +181,21 @@ def main(
modality: Annotated[
EvaluationModality,
typer.Option(
..., #EvaluationModality.TABLEFORMER,
..., # EvaluationModality.TABLEFORMER,
"-m", # Short name
"--modality", # Long name
help="Evaluation modality",
),
],
],
benchmark: Annotated[
BenchMarkNames,
typer.Option(
..., #BenchMarkNames.DPBENCH,
..., # BenchMarkNames.DPBENCH,
"-b", # Short name
"--benchmark", # Long name
help="Benchmark name",
),
],
],
idir: Annotated[
Path,
typer.Option(
Expand All @@ -181,7 +213,7 @@ def main(
"--output-dir", # Long name
help="Output directory",
),
],
],
):
# Dispatch the command
if task == EvaluationTask.CREATE:
Expand All @@ -191,10 +223,10 @@ def main(
evaluate(modality, benchmark, idir, odir)

elif task == EvaluationTask.VISUALIZE:
visualise(modality, benchmark, idir, odir)
visualise(modality, benchmark, idir, odir)

else:
_log.error("Unsupported command: '%s'", command)
log.error("Unsupported command: '%s'", task.value)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 3e364d2

Please sign in to comment.