Reformat with ruff format

ferdiga · Apr 7, 2024 · 065bddb · 065bddb
1 parent 067f429
commit 065bddb
Show file tree

Hide file tree

Showing 21 changed files with 68 additions and 50 deletions.
diff --git a/misc/batch.py b/misc/batch.py
@@ -27,7 +27,8 @@
 # pylint: disable=logging-format-interpolation
 # pylint: disable=logging-not-lazy
 
-def filecompare(a,b):
+
+def filecompare(a, b):
     try:
         return filecmp.cmp(a, b, shallow=True)
     except FileNotFoundError:
@@ -82,7 +83,9 @@ def filecompare(a,b):
         except ocrmypdf.exceptions.DigitalSignatureError:
             logging.info("Skipped document because it has a digital signature")
         except ocrmypdf.exceptions.TaggedPDFError:
-            logging.info("Skipped document because it does not need ocr as it is tagged")
+            logging.info(
+                "Skipped document because it does not need ocr as it is tagged"
+            )
         except:
             logging.error("Unhandled error occured")
         logging.info("OCR complete")
diff --git a/misc/synology.py b/misc/synology.py
@@ -53,9 +53,10 @@
         ]
         logging.info(cmd)
         full_path_ocr = os.path.join(dir_name, filename_ocr)
-        with open(filename, 'rb') as input_file, open(
-            full_path_ocr, 'wb'
-        ) as output_file:
+        with (
+            open(filename, 'rb') as input_file,
+            open(full_path_ocr, 'wb') as output_file,
+        ):
             proc = subprocess.run(
                 cmd,
                 stdin=input_file,

diff --git a/pyproject.toml b/pyproject.toml
@@ -176,3 +176,6 @@ convention = "google"
 "tests/*.py" = ["D100", "D101", "D102", "D103", "D105"]
 "misc/*.py" = ["D103", "D101", "D102"]
 "src/ocrmypdf/builtin_plugins/*.py" = ["D103", "D102", "D105"]
+
+[tool.ruff.format]
+quote-style = "preserve"
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
@@ -220,7 +220,8 @@ def get_deskew(
 
 def tesseract_log_output(stream: bytes) -> None:
     tlog = TesseractLoggerAdapter(
-        log, extra=log.extra if hasattr(log, 'extra') else None  # type: ignore
+        log,
+        extra=log.extra if hasattr(log, 'extra') else None,  # type: ignore
     )
 
     if not stream:

diff --git a/src/ocrmypdf/_metadata.py b/src/ocrmypdf/_metadata.py
@@ -166,9 +166,12 @@ def metadata_fixup(
 
     with Pdf.open(context.origin) as original, Pdf.open(working_file) as pdf:
         docinfo = get_docinfo(original, context)
-        with original.open_metadata(
-            set_pikepdf_as_editor=False, update_docinfo=False, strict=False
-        ) as meta_original, pdf.open_metadata() as meta_pdf:
+        with (
+            original.open_metadata(
+                set_pikepdf_as_editor=False, update_docinfo=False, strict=False
+            ) as meta_original,
+            pdf.open_metadata() as meta_pdf,
+        ):
             meta_pdf.load_from_docinfo(
                 docinfo, delete_missing=False, raise_failure=False
             )

diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
@@ -131,7 +131,7 @@ def _pdf_guess_version(input_file: Path, search_window=1024) -> str:
     """
     with open(input_file, 'rb') as f:
         signature = f.read(search_window)
-    m = re.search(br'%PDF-(\d\.\d)', signature)
+    m = re.search(rb'%PDF-(\d\.\d)', signature)
     if m:
         return m.group(1).decode('ascii')
     return ''
@@ -767,7 +767,9 @@ def render_hocr_page(hocr: Path, page_context: PageContext) -> Path:
             font=Courier(),
         )
     HocrTransform(
-        hocr_filename=hocr, dpi=dpi.to_scalar(), **debug_kwargs  # square
+        hocr_filename=hocr,
+        dpi=dpi.to_scalar(),
+        **debug_kwargs,  # square
     ).to_pdf(
         out_filename=output_file,
         image_filename=None,

diff --git a/src/ocrmypdf/_pipelines/hocr_to_ocr_pdf.py b/src/ocrmypdf/_pipelines/hocr_to_ocr_pdf.py
@@ -4,7 +4,6 @@
 
 """Implements the concurrent and page synchronous parts of the pipeline."""
 
-
 from __future__ import annotations
 
 import argparse

diff --git a/src/ocrmypdf/_pipelines/ocr.py b/src/ocrmypdf/_pipelines/ocr.py
@@ -4,7 +4,6 @@
 
 """Implements the concurrent and page synchronous parts of the pipeline."""
 
-
 from __future__ import annotations
 
 import argparse
@@ -155,12 +154,13 @@ def _run_pipeline(
     options: argparse.Namespace,
     plugin_manager: OcrmypdfPluginManager,
 ) -> ExitCode:
-    with manage_work_folder(
-        work_folder=Path(mkdtemp(prefix="ocrmypdf.io.")),
-        retain=options.keep_temporary_files,
-        print_location=options.keep_temporary_files,
-    ) as work_folder, manage_debug_log_handler(
-        options=options, work_folder=work_folder
+    with (
+        manage_work_folder(
+            work_folder=Path(mkdtemp(prefix="ocrmypdf.io.")),
+            retain=options.keep_temporary_files,
+            print_location=options.keep_temporary_files,
+        ) as work_folder,
+        manage_debug_log_handler(options=options, work_folder=work_folder),
     ):
         executor = setup_pipeline(options, plugin_manager)
         check_requested_output_file(options)

diff --git a/src/ocrmypdf/_pipelines/pdf_to_hocr.py b/src/ocrmypdf/_pipelines/pdf_to_hocr.py
@@ -4,7 +4,6 @@
 
 """Implements the concurrent and page synchronous parts of the pipeline."""
 
-
 from __future__ import annotations
 
 import argparse

diff --git a/src/ocrmypdf/builtin_plugins/default_filters.py b/src/ocrmypdf/builtin_plugins/default_filters.py
@@ -8,7 +8,5 @@
 
 
 @hookimpl
-def filter_pdf_page(
-    page, image_filename, output_pdf
-):  # pylint: disable=unused-argument
+def filter_pdf_page(page, image_filename, output_pdf):  # pylint: disable=unused-argument
     return output_pdf
diff --git a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: MPL-2.0
 """Built-in plugin to implement OCR using Tesseract."""
 
-
 from __future__ import annotations
 
 import logging

diff --git a/src/ocrmypdf/languages.py b/src/ocrmypdf/languages.py
@@ -7,12 +7,12 @@
 https://www.loc.gov/standards/iso639-2/ascii_8bits.html
 """
 
-
 from typing import NamedTuple
 
 
 class ISOCodeData(NamedTuple):
     """Data for a single ISO 639 code."""
+
     alt: str
     alpha_2: str
     english: str
@@ -168,8 +168,10 @@ class ISOCodeData(NamedTuple):
     'chu': ISOCodeData(
         '',
         'cu',
-        ('Church Slavic; Old Slavonic; Church Slavonic;'
-         ' Old Bulgarian; Old Church Slavonic'),
+        (
+            'Church Slavic; Old Slavonic; Church Slavonic;'
+            ' Old Bulgarian; Old Church Slavonic'
+        ),
         "slavon d'église; vieux slave; slavon liturgique; vieux bulgare",
     ),
     'chv': ISOCodeData('', 'cv', 'Chuvash', 'tchouvache'),

diff --git a/src/ocrmypdf/optimize.py b/src/ocrmypdf/optimize.py
@@ -3,7 +3,6 @@
 
 """Post-processing image optimization of OCR PDFs."""
 
-
 from __future__ import annotations
 
 import logging

diff --git a/src/ocrmypdf/quality.py b/src/ocrmypdf/quality.py
@@ -3,7 +3,6 @@
 
 """Utilities to measure OCR quality."""
 
-
 from __future__ import annotations
 
 import re

diff --git a/src/ocrmypdf/subprocess/_windows.py b/src/ocrmypdf/subprocess/_windows.py
@@ -79,7 +79,7 @@ def registry_path_ghostscript(env=None) -> Iterator[Path]:
                 registry_subkeys(k), key=ghostscript_version_key, default=(0, 0, 0)
             )
         with winreg.OpenKey(
-            winreg.HKEY_LOCAL_MACHINE, fr"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}"
+            winreg.HKEY_LOCAL_MACHINE, rf"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}"
         ) as k:
             for _, gs_path, _ in registry_values(k):
                 yield Path(gs_path) / 'bin'

diff --git a/tests/plugins/tesseract_cache.py b/tests/plugins/tesseract_cache.py
@@ -169,31 +169,35 @@ class CacheOcrEngine(TesseractOcrEngine):
 
     @staticmethod
     def get_orientation(input_file, options):
-        with CacheOcrEngine.lock, patch(
-            'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+        with (
+            CacheOcrEngine.lock,
+            patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
         ):
             return TesseractOcrEngine.get_orientation(input_file, options)
 
     @staticmethod
     def get_deskew(input_file, options) -> float:
-        with CacheOcrEngine.lock, patch(
-            'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+        with (
+            CacheOcrEngine.lock,
+            patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
         ):
             return TesseractOcrEngine.get_deskew(input_file, options)
 
     @staticmethod
     def generate_hocr(input_file, output_hocr, output_text, options):
-        with CacheOcrEngine.lock, patch(
-            'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+        with (
+            CacheOcrEngine.lock,
+            patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
         ):
             TesseractOcrEngine.generate_hocr(
                 input_file, output_hocr, output_text, options
             )
 
     @staticmethod
     def generate_pdf(input_file, output_pdf, output_text, options):
-        with CacheOcrEngine.lock, patch(
-            'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+        with (
+            CacheOcrEngine.lock,
+            patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
         ):
             TesseractOcrEngine.generate_pdf(
                 input_file, output_pdf, output_text, options

diff --git a/tests/plugins/tesseract_debug_rotate.py b/tests/plugins/tesseract_debug_rotate.py
@@ -72,9 +72,10 @@ def get_orientation(input_file, options):
 
     @staticmethod
     def generate_hocr(input_file, output_hocr, output_text, options):
-        with Image.open(input_file) as im, open(
-            output_hocr, 'w', encoding='utf-8'
-        ) as f:
+        with (
+            Image.open(input_file) as im,
+            open(output_hocr, 'w', encoding='utf-8') as f,
+        ):
             w, h = im.size
             f.write(HOCR_TEMPLATE.format(str(w), str(h)))
         with open(output_text, 'w') as f:

diff --git a/tests/plugins/tesseract_noop.py b/tests/plugins/tesseract_noop.py
@@ -70,9 +70,10 @@ def get_deskew(input_file, options):
 
     @staticmethod
     def generate_hocr(input_file, output_hocr, output_text, options):
-        with Image.open(input_file) as im, open(
-            output_hocr, 'w', encoding='utf-8'
-        ) as f:
+        with (
+            Image.open(input_file) as im,
+            open(output_hocr, 'w', encoding='utf-8') as f,
+        ):
             w, h = im.size
             f.write(HOCR_TEMPLATE.format(str(w), str(h)))
         with open(output_text, 'w') as f:

diff --git a/tests/plugins/tesseract_simulate_oom_killer.py b/tests/plugins/tesseract_simulate_oom_killer.py
@@ -9,6 +9,7 @@
 Page 4 was chosen because of this number's association with bad luck
 in many East Asian cultures.
 """
+
 # type: ignore
 from __future__ import annotations
 

diff --git a/tests/test_graft.py b/tests/test_graft.py
@@ -11,9 +11,11 @@
 
 
 def test_no_glyphless_graft(resources, outdir):
-    with pikepdf.open(resources / 'francais.pdf') as pdf, pikepdf.open(
-        resources / 'aspect.pdf'
-    ) as pdf_aspect, pikepdf.open(resources / 'cmyk.pdf') as pdf_cmyk:
+    with (
+        pikepdf.open(resources / 'francais.pdf') as pdf,
+        pikepdf.open(resources / 'aspect.pdf') as pdf_aspect,
+        pikepdf.open(resources / 'cmyk.pdf') as pdf_cmyk,
+    ):
         pdf.pages.extend(pdf_aspect.pages)
         pdf.pages.extend(pdf_cmyk.pages)
         pdf.save(outdir / 'test.pdf')

diff --git a/tests/test_metadata.py b/tests/test_metadata.py
@@ -35,9 +35,10 @@ def test_preserve_docinfo(output_type, resources, outpdf):
         '--plugin',
         'tests/plugins/tesseract_noop.py',
     )
-    with pikepdf.open(resources / 'graph.pdf') as pdf_before, pikepdf.open(
-        output
-    ) as pdf_after:
+    with (
+        pikepdf.open(resources / 'graph.pdf') as pdf_before,
+        pikepdf.open(output) as pdf_after,
+    ):
         for key in ('/Title', '/Author'):
             assert pdf_before.docinfo[key] == pdf_after.docinfo[key]
         pdfa_info = file_claims_pdfa(str(output))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,7 +4,6 @@

		"""Implements the concurrent and page synchronous parts of the pipeline."""


		from __future__ import annotations

		import argparse
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,6 @@

		"""Post-processing image optimization of OCR PDFs."""


		from __future__ import annotations

		import logging
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,6 @@

		"""Utilities to measure OCR quality."""


		from __future__ import annotations

		import re
Expand Down