Skip to content

Commit

Permalink
feat: allow extra config with multiple output command (#9)
Browse files Browse the repository at this point in the history
* feat: allow extra config with multiple output command

* upgrade runner to 22.04
  • Loading branch information
badGarnet authored Mar 4, 2025
1 parent 91385fa commit ea597d4
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 8 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ jobs:
fail-fast: true
matrix:
include:
- {name: '3.12', python: '3.12', os: ubuntu-20.04, tox: py312}
- {name: '3.11', python: '3.11', os: ubuntu-20.04, tox: py311}
- {name: '3.10', python: '3.10', os: ubuntu-20.04, tox: py310}
- {name: '3.9', python: '3.9', os: ubuntu-20.04, tox: py39}
- {name: '3.8', python: '3.8', os: ubuntu-20.04, tox: py38}
- {name: '3.12', python: '3.12', os: ubuntu-22.04, tox: py312}
- {name: '3.11', python: '3.11', os: ubuntu-22.04, tox: py311}
- {name: '3.10', python: '3.10', os: ubuntu-22.04, tox: py310}
- {name: '3.9', python: '3.9', os: ubuntu-22.04, tox: py39}
- {name: '3.8', python: '3.8', os: ubuntu-22.04, tox: py38}
steps:
- uses: actions/checkout@v3

Expand Down
18 changes: 18 additions & 0 deletions tests/pytesseract_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ def test_run_and_get_multiple_output(test_file, function_mapping, extensions):
assert result == function_mapping[extension](test_file)


def test_run_and_get_multiple_output_with_extra_config(
test_file,
function_mapping,
):
compound_results = run_and_get_multiple_output(
test_file,
extensions=['hocr', 'txt'],
extra_config='hocr_char_boxes=1',
)
assert (
compound_results[0][:1000]
== function_mapping['hocr'](test_file, config='-c hocr_char_boxes=1')[
:1000
]
)
assert compound_results[1] == function_mapping['txt'](test_file)


@pytest.mark.skipif(
TESSERACT_VERSION[:2] < (4, 1),
reason='requires tesseract >= 4.1',
Expand Down
2 changes: 1 addition & 1 deletion unstructured_pytesseract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
from .pytesseract import TSVNotSupported


__version__ = '0.3.13'
__version__ = '0.3.14'
5 changes: 3 additions & 2 deletions unstructured_pytesseract/pytesseract.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,16 @@ def run_and_get_multiple_output(
lang: Optional[str] = None,
nice: int = 0,
timeout: int = 0,
extra_config: str = '',
return_bytes: bool = False,
):
config = ' '.join(
EXTENTION_TO_CONFIG.get(extension, '') for extension in extensions
).strip()
if config:
config = f'-c {config}'
config = f'-c {config} {extra_config}'
else:
config = ''
config = extra_config

with save(image) as (temp_name, input_filename):
kwargs = {
Expand Down

0 comments on commit ea597d4

Please sign in to comment.