Skip to content

Commit

Permalink
Reformat with ruff format
Browse files Browse the repository at this point in the history
  • Loading branch information
jbarlow83 committed Apr 7, 2024
1 parent 067f429 commit 065bddb
Show file tree
Hide file tree
Showing 21 changed files with 68 additions and 50 deletions.
7 changes: 5 additions & 2 deletions misc/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
# pylint: disable=logging-format-interpolation
# pylint: disable=logging-not-lazy

def filecompare(a,b):

def filecompare(a, b):
try:
return filecmp.cmp(a, b, shallow=True)
except FileNotFoundError:
Expand Down Expand Up @@ -82,7 +83,9 @@ def filecompare(a,b):
except ocrmypdf.exceptions.DigitalSignatureError:
logging.info("Skipped document because it has a digital signature")
except ocrmypdf.exceptions.TaggedPDFError:
logging.info("Skipped document because it does not need ocr as it is tagged")
logging.info(
"Skipped document because it does not need ocr as it is tagged"
)
except:
logging.error("Unhandled error occured")
logging.info("OCR complete")
7 changes: 4 additions & 3 deletions misc/synology.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@
]
logging.info(cmd)
full_path_ocr = os.path.join(dir_name, filename_ocr)
with open(filename, 'rb') as input_file, open(
full_path_ocr, 'wb'
) as output_file:
with (
open(filename, 'rb') as input_file,
open(full_path_ocr, 'wb') as output_file,
):
proc = subprocess.run(
cmd,
stdin=input_file,
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,6 @@ convention = "google"
"tests/*.py" = ["D100", "D101", "D102", "D103", "D105"]
"misc/*.py" = ["D103", "D101", "D102"]
"src/ocrmypdf/builtin_plugins/*.py" = ["D103", "D102", "D105"]

[tool.ruff.format]
quote-style = "preserve"
3 changes: 2 additions & 1 deletion src/ocrmypdf/_exec/tesseract.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ def get_deskew(

def tesseract_log_output(stream: bytes) -> None:
tlog = TesseractLoggerAdapter(
log, extra=log.extra if hasattr(log, 'extra') else None # type: ignore
log,
extra=log.extra if hasattr(log, 'extra') else None, # type: ignore
)

if not stream:
Expand Down
9 changes: 6 additions & 3 deletions src/ocrmypdf/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,12 @@ def metadata_fixup(

with Pdf.open(context.origin) as original, Pdf.open(working_file) as pdf:
docinfo = get_docinfo(original, context)
with original.open_metadata(
set_pikepdf_as_editor=False, update_docinfo=False, strict=False
) as meta_original, pdf.open_metadata() as meta_pdf:
with (
original.open_metadata(
set_pikepdf_as_editor=False, update_docinfo=False, strict=False
) as meta_original,
pdf.open_metadata() as meta_pdf,
):
meta_pdf.load_from_docinfo(
docinfo, delete_missing=False, raise_failure=False
)
Expand Down
6 changes: 4 additions & 2 deletions src/ocrmypdf/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _pdf_guess_version(input_file: Path, search_window=1024) -> str:
"""
with open(input_file, 'rb') as f:
signature = f.read(search_window)
m = re.search(br'%PDF-(\d\.\d)', signature)
m = re.search(rb'%PDF-(\d\.\d)', signature)
if m:
return m.group(1).decode('ascii')
return ''
Expand Down Expand Up @@ -767,7 +767,9 @@ def render_hocr_page(hocr: Path, page_context: PageContext) -> Path:
font=Courier(),
)
HocrTransform(
hocr_filename=hocr, dpi=dpi.to_scalar(), **debug_kwargs # square
hocr_filename=hocr,
dpi=dpi.to_scalar(),
**debug_kwargs, # square
).to_pdf(
out_filename=output_file,
image_filename=None,
Expand Down
1 change: 0 additions & 1 deletion src/ocrmypdf/_pipelines/hocr_to_ocr_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

"""Implements the concurrent and page synchronous parts of the pipeline."""


from __future__ import annotations

import argparse
Expand Down
14 changes: 7 additions & 7 deletions src/ocrmypdf/_pipelines/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

"""Implements the concurrent and page synchronous parts of the pipeline."""


from __future__ import annotations

import argparse
Expand Down Expand Up @@ -155,12 +154,13 @@ def _run_pipeline(
options: argparse.Namespace,
plugin_manager: OcrmypdfPluginManager,
) -> ExitCode:
with manage_work_folder(
work_folder=Path(mkdtemp(prefix="ocrmypdf.io.")),
retain=options.keep_temporary_files,
print_location=options.keep_temporary_files,
) as work_folder, manage_debug_log_handler(
options=options, work_folder=work_folder
with (
manage_work_folder(
work_folder=Path(mkdtemp(prefix="ocrmypdf.io.")),
retain=options.keep_temporary_files,
print_location=options.keep_temporary_files,
) as work_folder,
manage_debug_log_handler(options=options, work_folder=work_folder),
):
executor = setup_pipeline(options, plugin_manager)
check_requested_output_file(options)
Expand Down
1 change: 0 additions & 1 deletion src/ocrmypdf/_pipelines/pdf_to_hocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

"""Implements the concurrent and page synchronous parts of the pipeline."""


from __future__ import annotations

import argparse
Expand Down
4 changes: 1 addition & 3 deletions src/ocrmypdf/builtin_plugins/default_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,5 @@


@hookimpl
def filter_pdf_page(
page, image_filename, output_pdf
): # pylint: disable=unused-argument
def filter_pdf_page(page, image_filename, output_pdf): # pylint: disable=unused-argument
return output_pdf
1 change: 0 additions & 1 deletion src/ocrmypdf/builtin_plugins/tesseract_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# SPDX-License-Identifier: MPL-2.0
"""Built-in plugin to implement OCR using Tesseract."""


from __future__ import annotations

import logging
Expand Down
8 changes: 5 additions & 3 deletions src/ocrmypdf/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
https://www.loc.gov/standards/iso639-2/ascii_8bits.html
"""


from typing import NamedTuple


class ISOCodeData(NamedTuple):
"""Data for a single ISO 639 code."""

alt: str
alpha_2: str
english: str
Expand Down Expand Up @@ -168,8 +168,10 @@ class ISOCodeData(NamedTuple):
'chu': ISOCodeData(
'',
'cu',
('Church Slavic; Old Slavonic; Church Slavonic;'
' Old Bulgarian; Old Church Slavonic'),
(
'Church Slavic; Old Slavonic; Church Slavonic;'
' Old Bulgarian; Old Church Slavonic'
),
"slavon d'église; vieux slave; slavon liturgique; vieux bulgare",
),
'chv': ISOCodeData('', 'cv', 'Chuvash', 'tchouvache'),
Expand Down
1 change: 0 additions & 1 deletion src/ocrmypdf/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

"""Post-processing image optimization of OCR PDFs."""


from __future__ import annotations

import logging
Expand Down
1 change: 0 additions & 1 deletion src/ocrmypdf/quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

"""Utilities to measure OCR quality."""


from __future__ import annotations

import re
Expand Down
2 changes: 1 addition & 1 deletion src/ocrmypdf/subprocess/_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def registry_path_ghostscript(env=None) -> Iterator[Path]:
registry_subkeys(k), key=ghostscript_version_key, default=(0, 0, 0)
)
with winreg.OpenKey(
winreg.HKEY_LOCAL_MACHINE, fr"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}"
winreg.HKEY_LOCAL_MACHINE, rf"SOFTWARE\Artifex\GPL Ghostscript\{latest_gs}"
) as k:
for _, gs_path, _ in registry_values(k):
yield Path(gs_path) / 'bin'
Expand Down
20 changes: 12 additions & 8 deletions tests/plugins/tesseract_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,31 +169,35 @@ class CacheOcrEngine(TesseractOcrEngine):

@staticmethod
def get_orientation(input_file, options):
with CacheOcrEngine.lock, patch(
'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
with (
CacheOcrEngine.lock,
patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
):
return TesseractOcrEngine.get_orientation(input_file, options)

@staticmethod
def get_deskew(input_file, options) -> float:
with CacheOcrEngine.lock, patch(
'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
with (
CacheOcrEngine.lock,
patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
):
return TesseractOcrEngine.get_deskew(input_file, options)

@staticmethod
def generate_hocr(input_file, output_hocr, output_text, options):
with CacheOcrEngine.lock, patch(
'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
with (
CacheOcrEngine.lock,
patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
):
TesseractOcrEngine.generate_hocr(
input_file, output_hocr, output_text, options
)

@staticmethod
def generate_pdf(input_file, output_pdf, output_text, options):
with CacheOcrEngine.lock, patch(
'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
with (
CacheOcrEngine.lock,
patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)),
):
TesseractOcrEngine.generate_pdf(
input_file, output_pdf, output_text, options
Expand Down
7 changes: 4 additions & 3 deletions tests/plugins/tesseract_debug_rotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ def get_orientation(input_file, options):

@staticmethod
def generate_hocr(input_file, output_hocr, output_text, options):
with Image.open(input_file) as im, open(
output_hocr, 'w', encoding='utf-8'
) as f:
with (
Image.open(input_file) as im,
open(output_hocr, 'w', encoding='utf-8') as f,
):
w, h = im.size
f.write(HOCR_TEMPLATE.format(str(w), str(h)))
with open(output_text, 'w') as f:
Expand Down
7 changes: 4 additions & 3 deletions tests/plugins/tesseract_noop.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ def get_deskew(input_file, options):

@staticmethod
def generate_hocr(input_file, output_hocr, output_text, options):
with Image.open(input_file) as im, open(
output_hocr, 'w', encoding='utf-8'
) as f:
with (
Image.open(input_file) as im,
open(output_hocr, 'w', encoding='utf-8') as f,
):
w, h = im.size
f.write(HOCR_TEMPLATE.format(str(w), str(h)))
with open(output_text, 'w') as f:
Expand Down
1 change: 1 addition & 0 deletions tests/plugins/tesseract_simulate_oom_killer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Page 4 was chosen because of this number's association with bad luck
in many East Asian cultures.
"""

# type: ignore
from __future__ import annotations

Expand Down
8 changes: 5 additions & 3 deletions tests/test_graft.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@


def test_no_glyphless_graft(resources, outdir):
with pikepdf.open(resources / 'francais.pdf') as pdf, pikepdf.open(
resources / 'aspect.pdf'
) as pdf_aspect, pikepdf.open(resources / 'cmyk.pdf') as pdf_cmyk:
with (
pikepdf.open(resources / 'francais.pdf') as pdf,
pikepdf.open(resources / 'aspect.pdf') as pdf_aspect,
pikepdf.open(resources / 'cmyk.pdf') as pdf_cmyk,
):
pdf.pages.extend(pdf_aspect.pages)
pdf.pages.extend(pdf_cmyk.pages)
pdf.save(outdir / 'test.pdf')
Expand Down
7 changes: 4 additions & 3 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def test_preserve_docinfo(output_type, resources, outpdf):
'--plugin',
'tests/plugins/tesseract_noop.py',
)
with pikepdf.open(resources / 'graph.pdf') as pdf_before, pikepdf.open(
output
) as pdf_after:
with (
pikepdf.open(resources / 'graph.pdf') as pdf_before,
pikepdf.open(output) as pdf_after,
):
for key in ('/Title', '/Author'):
assert pdf_before.docinfo[key] == pdf_after.docinfo[key]
pdfa_info = file_claims_pdfa(str(output))
Expand Down

0 comments on commit 065bddb

Please sign in to comment.