Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve input validation to allow quotes for strategy parameter #457

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.0.81.dev

* Update `strategy` parameter to allow `'` and `"` as input surrounding the value.

## 0.0.80

* Bump to `unstructured` 0.15.10
Expand Down
2 changes: 1 addition & 1 deletion prepline_general/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
app = FastAPI(
title="Unstructured Pipeline API",
summary="Partition documents with the Unstructured library",
version="0.0.80",
version="0.0.81",
docs_url="/general/docs",
openapi_url="/general/openapi.json",
servers=[
Expand Down
4 changes: 2 additions & 2 deletions prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ def return_content_type(filename: str):


@router.get("/general/v0/general", include_in_schema=False)
@router.get("/general/v0.0.80/general", include_in_schema=False)
@router.get("/general/v0.0.81/general", include_in_schema=False)
async def handle_invalid_get_request():
raise HTTPException(
status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported."
Expand All @@ -668,7 +668,7 @@ async def handle_invalid_get_request():
description="Description",
operation_id="partition_parameters",
)
@router.post("/general/v0.0.80/general", include_in_schema=False)
@router.post("/general/v0.0.81/general", include_in_schema=False)
def general_partition(
request: Request,
# cannot use annotated type here because of a bug described here:
Expand Down
2 changes: 1 addition & 1 deletion prepline_general/api/models/form_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def as_form(
description="The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto",
examples=["auto", "hi_res"],
),
BeforeValidator(SmartValueParser[str]().value_or_first_element),
BeforeValidator(SmartValueParser[str]().literal_value_stripped_or_first_element),
] = "auto",
extract_image_block_types: Annotated[
List[str],
Expand Down
11 changes: 11 additions & 0 deletions prepline_general/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ def value_or_first_element(self, value: Union[T, list[T]]) -> list[T] | T | None
return [_cast_to_type(elem, container_elems_class) for elem in value]
return _cast_to_type(value, origin_class) # noqa

def literal_value_stripped_or_first_element(self, value: str) -> str | None:
"""Returns the value itself for literal strings and strips quotation characters.

Args:
value (Union[T, List[str]]): value to cast to a type T or return as is
"""
origin_class, container_elems_class = self._get_origin_container_classes()
value = value.replace("'", "")
value = value.replace('"', "")
return _cast_to_type(value, origin_class)

def _get_origin_container_classes(self) -> tuple[type, type | None]:
"""Extracts class (and container class if it's a list) from a type hint

Expand Down
2 changes: 1 addition & 1 deletion preprocessing-pipeline-family.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name: general
version: 0.0.80
version: 0.0.81
Empty file added test_general/__init__.py
Empty file.
Empty file added test_general/api/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions test_general/test_utils.py → test_general/api/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,23 @@
def test_smart_value_parser(desired_type: type, value_to_parse: Any, expected_result: Any):
parsed_value = SmartValueParser[desired_type]().value_or_first_element(value_to_parse)
assert expected_result == parsed_value


@pytest.mark.parametrize(
"desired_type, value_to_parse, expected_result",
[
(str, "fast", "fast"),
(str, "'fast'", "fast"),
(str, '"fast"', "fast"),
(str, "!fast", "!fast"),
(str, "fa'st", "fast"),
(str, "fast''''''", "fast"),
],
)
def test_literal_value_stripped_or_first_element(
desired_type: type, value_to_parse: Any, expected_result: Any
):
parsed_value = SmartValueParser[desired_type]().literal_value_stripped_or_first_element(
value_to_parse
)
assert expected_result == parsed_value