Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/Address server_url breaking change #226

Merged
merged 12 commits into from
Feb 20, 2025
3 changes: 3 additions & 0 deletions .genignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ _test_unstructured_client

# ignore Makefile
Makefile

# Ignore the general.partition code until we can fix the base_url issue
src/unstructured_client/general.py
2 changes: 1 addition & 1 deletion .speakeasy/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ speakeasyVersion: latest
sources:
my-source:
inputs:
- location: https://api.unstructured.io/general/openapi.json
- location: https://platform.unstructuredapp.io/openapi.json
- location: https://api.unstructured.io/general/openapi.json
overlays:
- location: ./overlay_client.yaml
registry:
Expand Down
2 changes: 1 addition & 1 deletion _test_contract/platform_api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def platform_api_url():
def client(platform_api_url) -> UnstructuredClient:
_client = UnstructuredClient(
api_key_auth=FAKE_API_KEY,
server_url="platform-api",
server_url=platform_api_url,
retry_config=RetryConfig(
strategy="backoff",
retry_connection_errors=False,
Expand Down
247 changes: 247 additions & 0 deletions _test_unstructured_client/unit/test_server_urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
import httpx
import pytest

from unstructured_client.models import operations
from unstructured_client import UnstructuredClient, basesdk, utils


# Raise one of these from our mock to return to the test code
class BaseUrlCorrect(Exception):
pass


class BaseUrlIncorrect(Exception):
pass


def get_client_method_with_mock(
sdk_endpoint_name,
client_instance,
mocked_server_url,
monkeypatch
):
"""
Given an endpoint name, e.g. "general.partition", return a reference
to that method off of the given client instance.

The client's _build_request will have the following mock:
Assert that the provided server_url is passed into _build_request.
Raise a custom exception to get back to the test.
"""
# Mock this to get past param validation
def mock_unmarshal(*args, **kwargs):
return {}

monkeypatch.setattr(utils, "unmarshal", mock_unmarshal)

# Assert that the correct base_url makes it to here
def mock_build_request(*args, base_url, **kwargs):
if base_url == mocked_server_url:
raise BaseUrlCorrect
else:
raise BaseUrlIncorrect(base_url)

# Find the method from the given string
class_name, method_name = sdk_endpoint_name.split(".")
endpoint_class = getattr(client_instance, class_name)
endpoint_method = getattr(endpoint_class, method_name)

if "async" in method_name:
monkeypatch.setattr(endpoint_class, "_build_request_async", mock_build_request)
else:
monkeypatch.setattr(endpoint_class, "_build_request", mock_build_request)

return endpoint_method


@pytest.mark.parametrize(
"sdk_endpoint_name",
[
("general.partition"),
],
)
def test_endpoint_uses_correct_url(monkeypatch, sdk_endpoint_name):
# Test 1
# Pass server_url to the client, no path
s = UnstructuredClient(server_url="http://localhost:8000")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and ignored, got {e}")

# Test 2
# Pass server_url to the client, with path
s = UnstructuredClient(server_url="http://localhost:8000/my/endpoint")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and was not stripped, got {e}")

# Test 3
# Pass server_url to the endpoint, no path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={}, server_url="http://localhost:8000")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")

# Test 4
# Pass server_url to the endpoint, with path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
client_method(request={}, server_url="http://localhost:8000/my/endpoint")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")


# Test 5
# No server_url, should take the default
server_url = "https://api.unstructuredapp.io" if "partition" in sdk_endpoint_name else "https://platform.unstructuredapp.io"

s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
server_url,
monkeypatch
)

try:
client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"Default url not used, got {e}")


@pytest.mark.asyncio
@pytest.mark.parametrize(
"sdk_endpoint_name",
[
("general.partition_async"),
],
)
async def test_async_endpoint_uses_correct_url(monkeypatch, sdk_endpoint_name):
# Test 1
# Pass server_url to the client, no path
s = UnstructuredClient(server_url="http://localhost:8000")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and ignored, got {e}")

# Test 2
# Pass server_url to the client, with path
s = UnstructuredClient(server_url="http://localhost:8000/my/endpoint")
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to client and was not stripped, got {e}")

# Test 3
# Pass server_url to the endpoint, no path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={}, server_url="http://localhost:8000")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")

# Test 4
# Pass server_url to the endpoint, with path
s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
"http://localhost:8000",
monkeypatch
)

try:
await client_method(request={}, server_url="http://localhost:8000/my/endpoint")
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"server_url was passed to endpoint and ignored, got {e}")


# Test 5
# No server_url, should take the default
server_url = "https://api.unstructuredapp.io" if "partition" in sdk_endpoint_name else "https://platform.unstructuredapp.io"

s = UnstructuredClient()
client_method = get_client_method_with_mock(
sdk_endpoint_name,
s,
server_url,
monkeypatch
)

try:
await client_method(request={})
except BaseUrlCorrect:
pass
except BaseUrlIncorrect as e:
pytest.fail(f"Default url not used, got {e}")
6 changes: 3 additions & 3 deletions docs/models/errors/httpvalidationerror.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

## Fields

| Field | Type | Required | Description |
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- |
| `detail` | List[[shared.ValidationError](../../models/shared/validationerror.md)] | :heavy_minus_sign: | N/A |
| Field | Type | Required | Description |
| -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- | -------------------------------------------------------- |
| `detail` | [Optional[errors.Detail]](../../models/errors/detail.md) | :heavy_minus_sign: | N/A |
55 changes: 39 additions & 16 deletions src/unstructured_client/_hooks/custom/clean_server_url_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,54 @@
from unstructured_client.httpclient import HttpClient


class CleanServerUrlSDKInitHook(SDKInitHook):
"""Hook fixing common mistakes by users in defining `server_url` in the unstructured-client"""
def clean_server_url(base_url: str) -> str:
"""Fix url scheme and remove the '/general/v0/general' path."""

if not base_url:
return ""

# add a url scheme if not present (urllib.parse does not work reliably without it)
if "http" not in base_url:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would parse the URL first and then check if a url has a scheme by using parsed_url.scheme. Then we can prepend http:// or https://

Copy link
Collaborator Author

@awalker4 awalker4 Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That comment has been around for a while - I tried parsing without the scheme and it does look like we need to make sure http:// is there first or it gets confused.

In [2]: urlparse("localhost:8000/general/v0/general")
Out[2]: ParseResult(scheme='localhost', netloc='', path='8000/general/v0/general', params='', query='', fragment='')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, good to know!

base_url = "http://" + base_url

parsed_url: ParseResult = urlparse(base_url)

if "api.unstructuredapp.io" in parsed_url.netloc:
if parsed_url.scheme != "https":
parsed_url = parsed_url._replace(scheme="https")

# We only want the base url
return urlunparse(parsed_url._replace(path="", params="", query="", fragment=""))

def clean_server_url(self, base_url: str) -> str:
"""Fix url scheme and remove the '/general/v0/general' path."""

if not base_url:
return ""
# -- add a url scheme if not present (urllib.parse does not work reliably without it)
if "http" not in base_url:
base_url = "http://" + base_url
def choose_server_url(endpoint_url: str | None, client_url: str, default_endpoint_url: str) -> str:
"""
Helper function to fix a breaking change in the SDK past 0.30.0.
When we merged the partition and platform specs, the client server_url stopped working,
and users need to pass it in the endpoint function.
For now, call this helper in the generated code to set the correct url.

parsed_url: ParseResult = urlparse(base_url)
Order of choices:
Endpoint server_url -> s.general.partition(server_url=...)
(Passed in as None if not set)

if "api.unstructuredapp.io" in base_url:
if parsed_url.scheme != "https":
parsed_url = parsed_url._replace(scheme="https")
Base client server_url -> s = UnstructuredClient(server_url=...)
(Passed as empty string if not set)

# -- path should always be empty
return urlunparse(parsed_url._replace(path=""))
Default endpoint URL as defined in the spec
"""

url = endpoint_url if endpoint_url is not None else (client_url or default_endpoint_url)
return clean_server_url(url)


class CleanServerUrlSDKInitHook(SDKInitHook):
"""Hook fixing common mistakes by users in defining `server_url` in the unstructured-client"""

def sdk_init(
self, base_url: str, client: HttpClient
) -> Tuple[str, HttpClient]:
"""Concrete implementation for SDKInitHook."""
cleaned_url = self.clean_server_url(base_url)
cleaned_url = clean_server_url(base_url)

return cleaned_url, client
Loading
Loading