From cbcbefed9657f18456d93db43a86241589713e05 Mon Sep 17 00:00:00 2001 From: dothinking Date: Fri, 19 Jan 2024 23:00:29 +0800 Subject: [PATCH] check PyMuPDF version: 1.19.0<=v<=1.23.8 or v>=1.23.16 --- pdf2docx/common/Element.py | 14 ++++++-------- pdf2docx/converter.py | 8 +++++--- requirements.txt | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pdf2docx/common/Element.py b/pdf2docx/common/Element.py index e0a5cb4..6681c42 100644 --- a/pdf2docx/common/Element.py +++ b/pdf2docx/common/Element.py @@ -1,8 +1,8 @@ '''Object with a bounding box, e.g. Block, Line, Span. -Based on ``PyMuPDF``, the coordinates (e.g. bbox of ``page.get_text('rawdict')``) are generally -provided relative to the un-rotated page; while this ``pdf2docx`` library works under real page -coordinate system, i.e. with rotation considered. So, any instances created by this Class are +Based on ``PyMuPDF``, the coordinates (e.g. bbox of ``page.get_text('rawdict')``) are generally +provided relative to the un-rotated page; while this ``pdf2docx`` library works under real page +coordinate system, i.e. with rotation considered. So, any instances created by this Class are always applied a rotation matrix automatically. Therefore, the bbox parameter used to create ``Element`` instance MUST be relative to un-rotated @@ -34,7 +34,7 @@ def set_rotation_matrix(cls, rotation_matrix): Args: Rotation_matrix (fitz.Matrix): target matrix - """ + """ if rotation_matrix and isinstance(rotation_matrix, fitz.Matrix): cls.ROTATION_MATRIX = rotation_matrix @@ -198,8 +198,7 @@ def vertically_align_with(self, e, factor:float=0.0, text_direction:bool=True): e (Element): Object to check with factor (float, optional): Threshold of overlap ratio, the larger it is, the higher probability the two bbox-es are aligned. - text_direction (bool, optional): Consider text direction or not. - True by default,from left to right if False. + text_direction (bool, optional): Consider text direction or not. True by default. Returns: bool: [description] @@ -240,8 +239,7 @@ def horizontally_align_with(self, e, factor:float=0.0, text_direction:bool=True) e (Element): Element to check with factor (float, optional): threshold of overlap ratio, the larger it is, the higher probability the two bbox-es are aligned. - text_direction (bool, optional): consider text direction or not. - True by default, from left to right if False. + text_direction (bool, optional): consider text direction or not. True by default. Examples:: diff --git a/pdf2docx/converter.py b/pdf2docx/converter.py index ddfbb63..8da7e8a 100644 --- a/pdf2docx/converter.py +++ b/pdf2docx/converter.py @@ -12,9 +12,11 @@ from .page.Page import Page from .page.Pages import Pages -# check PyMuPDF>=1.19.x -if list(map(int, fitz.VersionBind.split("."))) < [1, 19, 0]: - raise SystemExit("PyMuPDF>=1.19.0 is required for pdf2docx.") +# check PyMuPDF version +# 1.19.0 <= v <= 1.23.8, or v>=1.23.16 +v = list(map(int, fitz.VersionBind.split("."))) +if v < [1,19,0] or [1,23,8]=1.23.16 is required for pdf2docx.") # logging logging.basicConfig( diff --git a/requirements.txt b/requirements.txt index 7547c9d..14baa2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -PyMuPDF==1.23.8 +PyMuPDF python-docx>=0.8.10 fonttools>=4.24.0 numpy>=1.17.2