diff --git a/src/unstructured_client/_hooks/custom/clean_server_url_hook.py b/src/unstructured_client/_hooks/custom/clean_server_url_hook.py index eb8f6580..225bcb5a 100644 --- a/src/unstructured_client/_hooks/custom/clean_server_url_hook.py +++ b/src/unstructured_client/_hooks/custom/clean_server_url_hook.py @@ -12,46 +12,44 @@ def clean_server_url(base_url: str) -> str: if not base_url: return "" - # -- add a url scheme if not present (urllib.parse does not work reliably without it) + + # add a url scheme if not present (urllib.parse does not work reliably without it) if "http" not in base_url: base_url = "http://" + base_url parsed_url: ParseResult = urlparse(base_url) - if "api.unstructuredapp.io" in base_url: + unstructured_services = [ + "api.unstructuredapp.io", + "api.unstructured.io", + "platform.unstructuredapp.io", + ] + if parsed_url.netloc in unstructured_services: if parsed_url.scheme != "https": parsed_url = parsed_url._replace(scheme="https") - # -- path should always be empty - return urlunparse(parsed_url._replace(path="")) + # We only want the base url + return urlunparse(parsed_url._replace(path="", params="", query="", fragment="")) -def choose_server_url(endpoint_url, client_url, default_endpoint_url) -> str: +def choose_server_url(endpoint_url: str, client_url: str, default_endpoint_url: str) -> str: """ Helper function to fix a breaking change in the SDK past 0.30.0. When we merged the partition and platform specs, the client server_url stopped working, and users need to pass it in the endpoint function. For now, call this helper in the generated code to set the correct url. - """ - # First, see if the endpoint has a url: - # s.general.partition(server_url=...) - if endpoint_url is not None: - url = endpoint_url + Order of choices: + Endpoint server_url -> s.general.partition(server_url=...) + (Passed in as None if not set) - # Next, try the base client url: - # s = UnstructuredClient(server_url=...) - # (If not set it's an empty string) - elif client_url != "": - url = client_url + Base client server_url -> s = UnstructuredClient(server_url=...) + (Passed as empty string if not set) - # Finally, take the url defined in the spec: - # operations.PARTITION_SERVERS[...] - else: - url = default_endpoint_url + Default endpoint URL as defined in the spec + """ - # Make sure we drop the path if it's provided anywhere - # (The endpoint url will be set after we've done the init hooks) + url = endpoint_url if endpoint_url is not None else (client_url or default_endpoint_url) return clean_server_url(url)