From 621d6a0b89e9660555e00a162b27bdb14725fb4c Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Tue, 16 Jul 2024 13:36:48 -0700 Subject: [PATCH] Fix image size calculation when SMask dimensions do not match image Closes [Bug]: Ghostscript rasterizing failed #1351 --- src/ocrmypdf/pdfinfo/info.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py index e815b6c41..013ef4381 100644 --- a/src/ocrmypdf/pdfinfo/info.py +++ b/src/ocrmypdf/pdfinfo/info.py @@ -369,8 +369,18 @@ def __init__( pim = PdfImage(pdfimage) else: raise ValueError("Either pdfimage or inline must be set") - self._width = pim.width - self._height = pim.height + if pim.obj.get(Name.SMask, None) is not None: + # SMask is pretty much an alpha channel, but in PDF it's possible + # for channel to have different dimensions than the image + # itself. Some PDF writers use this to create a grayscale stencil + # mask. For our purposes, the effective size is the size of the + # larger component (image or smask). + smask = pim.obj[Name.SMask] + self._width = max(smask.get(Name.Width, 0), pim.width) + self._height = max(smask.get(Name.Height, 0), pim.height) + else: + self._width = pim.width + self._height = pim.height # If /ImageMask is true, then this image is a stencil mask # (Images that draw with this stencil mask will have a reference to @@ -1071,7 +1081,6 @@ def page_dpi_profile(self) -> PageResolutionProfile | None: arg_max_dpi = image_dpis.index(max_dpi) max_area_ratio = image_areas[arg_max_dpi] / total_drawn_area - return PageResolutionProfile( weighted_dpi, max_dpi,