From ee5442d08e7f7bd64ed5765758e71f6b225666f2 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa <38427957+QuentinFuxa@users.noreply.github.com> Date: Thu, 9 Jan 2025 07:28:56 +0100 Subject: [PATCH] Improve docstring in _progressbar.py --- src/ocrmypdf/_progressbar.py | 122 +++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 14 deletions(-) diff --git a/src/ocrmypdf/_progressbar.py b/src/ocrmypdf/_progressbar.py index 67eace864..2a6a581e5 100644 --- a/src/ocrmypdf/_progressbar.py +++ b/src/ocrmypdf/_progressbar.py @@ -32,12 +32,76 @@ class ProgressBar(Protocol): The progress bar is held in the main process/thread and not updated by child process/threads. When a child notifies the parent of completed work, the parent updates the progress bar. - Progress bars should never write to ``sys.stdout``, or they will corrupt the output if OCRmyPDF writes a PDF to standard output. - The type of events that OCRmyPDF reports to a progress bar may change in + Note: + The type of events that OCRmyPDF reports to a progress bar may change in minor releases. + + Args: + total (int | float | None): + The total number of work units expected. If ``None``, the total is unknown. + For example, if you are processing pages, this might be the number of pages, + or if you are measuring overall progress in percent, this might be 100. + desc (str | None): + A brief description of the current step (e.g. "Scanning contents", + "OCR", "PDF/A conversion"). OCRmyPDF updates this before each major step. + unit (str | None): + A short label for the type of work being tracked (e.g. "page", "%", "image"). + disable (bool): + If ``True``, progress updates are suppressed (no output). Defaults to ``False``. + **kwargs: + Future or extra parameters that OCRmyPDF might pass. Implementations + should accept and ignore unrecognized keywords gracefully. + + Example: + A simple plugin implementation could look like this: + + .. code-block:: python + + from ocrmypdf.pluginspec import ProgressBar + from ocrmypdf import hookimpl + + class ConsoleProgressBar(ProgressBar): + def __init__(self, *, total=None, desc=None, unit=None, disable=False, **kwargs): + self.total = total + self.desc = desc + self.unit = unit + self.disable = disable + self.current = 0 + + def __enter__(self): + if not self.disable: + print(f"Starting {self.desc or 'an OCR task'} (total={self.total} {self.unit})") + return self + + def __exit__(self, exc_type, exc_value, traceback): + if not self.disable: + if exc_type is None: + print("Completed successfully.") + else: + print(f"Task ended with error: {exc_value}") + return False # Let OCRmyPDF raise any exceptions + + def update(self, n=1, *, completed=None): + if completed is not None: + # If 'completed' is given, you could set self.current = completed + # but let's just read it to show usage + print(f"Absolute completion reported: {completed}") + # Otherwise, we increment by 'n' + self.current += n + if not self.disable: + if self.total: + percent = (self.current / self.total) * 100 + print(f"{self.desc}: {self.current}/{self.total} ({percent:.1f}%)") + else: + print(f"{self.desc}: {self.current} units done") + + @hookimpl + def get_progressbar_class(): + return MyProgressBar + """ def __init__( @@ -51,13 +115,22 @@ def __init__( ): """Initialize a progress bar. - *total* indicates the total number of work units. If None, the total - number of work units is unknown. If *disable* is True, the progress bar - should be disabled. *unit* is a description of the work unit. - *desc* is a description of the overall task to be performed. - - Unrecognized keyword arguments must be ignored, as the list of keyword - arguments may grow with time. + This is called once before any work is done. OCRmyPDF supplies the total + number of units (or None if unknown), a description of the work, and the + type of units. The ``disable`` parameter can be used to turn off progress + reporting. Unrecognized keyword arguments should be ignored. + + Args: + total (int | float | None): + The total amount of work. If ``None``, the total is unknown. + desc (str | None): + A description of the current task. May change for different stages. + unit (str | None): + A short label for the unit of work. + disable (bool): + If ``True``, no output or logging should be displayed. + **kwargs: + Extra parameters that may be passed by OCRmyPDF in future versions. """ def __enter__(self): @@ -66,13 +139,34 @@ def __enter__(self): def __exit__(self, *args): """Exit a progress bar context.""" - def update(self, n=1, *, completed=None): - """Update the progress bar by an increment. - - For use within a progress bar context. + def update(self, n: float = 1, *, completed: float | None = None): + """Increment the progress bar by ``n`` units, or set an absolute completion. + + OCRmyPDF calls this method repeatedly while processing pages or other tasks. + If your total is known and you track it, you might do something like: + + .. code-block:: python + + self.current += n + percent = (self.current / total) * 100 + + The ``completed`` argument can indicate an absolute position, which is + particularly helpful if you're tracking a percentage of work (e.g., 0 to 100) + and want precise updates. In contrast, the incremental parameter ``n`` is + often more useful for page-based increments. + + Args: + n (float, optional): + The amount to increment the progress by. Defaults to 1. May be + fractional if OCRmyPDF performs partial steps. If you are tracking + pages, this is typically how many pages have been processed in the + most recent step. + completed (float | None, optional): + The absolute amount of work completed so far. This can override or + supplement the simple increment logic. It's particularly useful + for percentage-based tracking (e.g., when ``total`` is 100). """ - class NullProgressBar: """Progress bar API that takes no actions."""