Skip to content

Commit

Permalink
Refactor server url helper functions
Browse files Browse the repository at this point in the history
  • Loading branch information
awalker4 committed Feb 19, 2025
1 parent f21ac18 commit 8625982
Showing 1 changed file with 19 additions and 21 deletions.
40 changes: 19 additions & 21 deletions src/unstructured_client/_hooks/custom/clean_server_url_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,44 @@ def clean_server_url(base_url: str) -> str:

if not base_url:
return ""
# -- add a url scheme if not present (urllib.parse does not work reliably without it)

# add a url scheme if not present (urllib.parse does not work reliably without it)
if "http" not in base_url:
base_url = "http://" + base_url

parsed_url: ParseResult = urlparse(base_url)

if "api.unstructuredapp.io" in base_url:
unstructured_services = [
"api.unstructuredapp.io",
"api.unstructured.io",
"platform.unstructuredapp.io",
]
if parsed_url.netloc in unstructured_services:
if parsed_url.scheme != "https":
parsed_url = parsed_url._replace(scheme="https")

# -- path should always be empty
return urlunparse(parsed_url._replace(path=""))
# We only want the base url
return urlunparse(parsed_url._replace(path="", params="", query="", fragment=""))


def choose_server_url(endpoint_url, client_url, default_endpoint_url) -> str:
def choose_server_url(endpoint_url: str, client_url: str, default_endpoint_url: str) -> str:
"""
Helper function to fix a breaking change in the SDK past 0.30.0.
When we merged the partition and platform specs, the client server_url stopped working,
and users need to pass it in the endpoint function.
For now, call this helper in the generated code to set the correct url.
"""
# First, see if the endpoint has a url:
# s.general.partition(server_url=...)
if endpoint_url is not None:
url = endpoint_url
Order of choices:
Endpoint server_url -> s.general.partition(server_url=...)
(Passed in as None if not set)
# Next, try the base client url:
# s = UnstructuredClient(server_url=...)
# (If not set it's an empty string)
elif client_url != "":
url = client_url
Base client server_url -> s = UnstructuredClient(server_url=...)
(Passed as empty string if not set)
# Finally, take the url defined in the spec:
# operations.PARTITION_SERVERS[...]
else:
url = default_endpoint_url
Default endpoint URL as defined in the spec
"""

# Make sure we drop the path if it's provided anywhere
# (The endpoint url will be set after we've done the init hooks)
url = endpoint_url if endpoint_url is not None else (client_url or default_endpoint_url)
return clean_server_url(url)


Expand Down

0 comments on commit 8625982

Please sign in to comment.