Skip to content

Commit

Permalink
Spike HTTP(S) proxy implementation for sync client.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaugustin committed Jan 31, 2025
1 parent 469c41a commit c222821
Show file tree
Hide file tree
Showing 7 changed files with 539 additions and 36 deletions.
3 changes: 1 addition & 2 deletions docs/reference/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,11 @@ Client
| Perform HTTP Digest Authentication |||||
| (`#784`_) | | | | |
+------------------------------------+--------+--------+--------+--------+
| Connect via HTTP proxy (`#364`_) | | |||
| Connect via HTTP proxy | | |||
+------------------------------------+--------+--------+--------+--------+
| Connect via SOCKS5 proxy |||||
+------------------------------------+--------+--------+--------+--------+

.. _#364: https://github.com/python-websockets/websockets/issues/364
.. _#784: https://github.com/python-websockets/websockets/issues/784

Known limitations
Expand Down
5 changes: 5 additions & 0 deletions docs/topics/proxies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h.

python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`)
but does not support other authentication methods such as GSSAPI (:rfc:`1961`).

HTTP proxies
------------

TODO
144 changes: 139 additions & 5 deletions src/websockets/asyncio/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,24 @@
import urllib.parse
from collections.abc import AsyncIterator, Generator, Sequence
from types import TracebackType
from typing import Any, Callable, Literal
from typing import Any, Callable, Literal, cast

from ..client import ClientProtocol, backoff
from ..datastructures import HeadersLike
from ..exceptions import InvalidMessage, InvalidStatus, ProxyError, SecurityError
from ..datastructures import Headers, HeadersLike
from ..exceptions import (
InvalidMessage,
InvalidProxyMessage,
InvalidProxyStatus,
InvalidStatus,
ProxyError,
SecurityError,
)
from ..extensions.base import ClientExtensionFactory
from ..extensions.permessage_deflate import enable_client_permessage_deflate
from ..headers import validate_subprotocols
from ..headers import build_authorization_basic, build_host, validate_subprotocols
from ..http11 import USER_AGENT, Response
from ..protocol import CONNECTING, Event
from ..streams import StreamReader
from ..typing import LoggerLike, Origin, Subprotocol
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
from .compatibility import TimeoutError, asyncio_timeout
Expand Down Expand Up @@ -257,7 +265,7 @@ class connect:
the TLS handshake.
* You can set ``host`` and ``port`` to connect to a different host and port
from those found in ``uri``. This only changes the destination of the TCP
from those found in ``uri``. This only changes the ws_uri of the TCP
connection. The host name from ``uri`` is still used in the TLS handshake
for secure connections and in the ``Host`` header.
Expand All @@ -266,6 +274,16 @@ class connect:
:meth:`~asyncio.loop.create_connection` method) to create a suitable
client socket and customize it.
When using a proxy:
* Prefix keyword arguments with ``proxy_`` for configuring TLS between the
client and an HTTPS proxy: ``proxy_ssl``, ``proxy_server_hostname``,
``proxy_ssl_handshake_timeout``, and ``proxy_ssl_shutdown_timeout``.
* Use the standard keyword arguments for configuring TLS between the proxy
and the WebSocket server: ``ssl``, ``server_hostname``,
``ssl_handshake_timeout``, and ``ssl_shutdown_timeout``.
* Other keyword arguments are used only for connecting to the proxy.
Raises:
InvalidURI: If ``uri`` isn't a valid WebSocket URI.
InvalidProxy: If ``proxy`` isn't a valid proxy.
Expand Down Expand Up @@ -385,16 +403,41 @@ def factory() -> ClientConnection:
elif proxy is not None:
proxy_parsed = parse_proxy(proxy)
if proxy_parsed.scheme[:5] == "socks":
# Connect to the proxy.
sock = await connect_socks_proxy(
proxy_parsed,
ws_uri,
local_addr=kwargs.pop("local_addr", None),
)
# Connect to the server via the proxy.
_, connection = await loop.create_connection(
factory,
sock=sock,
**kwargs,
)
elif proxy[:4] == "http":
# Split keyword arguments for connecting to the proxy or the server.
all_kwargs, proxy_kwargs, kwargs = kwargs, {}, {}
for key, value in all_kwargs.items():
if key.startswith("ssl") or key == "server_hostname":
kwargs[key] = value
elif key.startswith("proxy_"):
proxy_kwargs[key[6:]] = value
else:
proxy_kwargs[key] = value
# Connect to the proxy.
transport = await connect_http_proxy(
parse_proxy(proxy),
ws_uri,
**proxy_kwargs,
)
# Connect to the server via the proxy.
connection = factory()
transport.set_protocol(connection)
ssl = kwargs.pop("ssl", None)
if ssl is not None:
await loop.start_tls(transport, connection, ssl, **kwargs)
connection.connection_made(transport)
else:
raise AssertionError("unsupported proxy")
else:
Expand Down Expand Up @@ -652,3 +695,94 @@ async def connect_socks_proxy(
**kwargs: Any,
) -> socket.socket:
raise ImportError("python-socks is required to use a SOCKS proxy")


def prepare_connect_request(proxy: Proxy, ws_uri: WebSocketURI) -> bytes:
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
headers = Headers()
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
if proxy.username is not None:
assert proxy.password is not None # enforced by parse_proxy()
headers["Proxy-Authorization"] = build_authorization_basic(
proxy.username, proxy.password
)
# We cannot use the Request class because it supports only GET requests.
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()


class HTTPProxyConnection(asyncio.Protocol):
def __init__(self, ws_uri: WebSocketURI, proxy: Proxy):
self.ws_uri = ws_uri
self.proxy = proxy

self.reader = StreamReader()
self.parser = Response.parse(
self.reader.read_line,
self.reader.read_exact,
self.reader.read_to_eof,
include_body=False,
)

loop = asyncio.get_running_loop()
self.response: asyncio.Future[Response] = loop.create_future()

def run_parser(self) -> None:
try:
next(self.parser)
except StopIteration as exc:
response = exc.value
if 200 <= response.status_code < 300:
self.response.set_result(response)
else:
self.response.set_exception(InvalidProxyStatus(response))
except Exception as exc:
proxy_exc = InvalidProxyMessage(
"did not receive a valid HTTP response from proxy"
)
proxy_exc.__cause__ = exc
self.response.set_exception(proxy_exc)

def connection_made(self, transport: asyncio.BaseTransport) -> None:
transport = cast(asyncio.Transport, transport)
self.transport = transport
self.transport.write(prepare_connect_request(self.proxy, self.ws_uri))

def data_received(self, data: bytes) -> None:
print("RECV", data)
self.reader.feed_data(data)
self.run_parser()

def eof_received(self) -> None:
print("EOF")
self.reader.feed_eof()
self.run_parser()

def connection_lost(self, exc: Exception | None) -> None:
self.reader.feed_eof()
if exc is not None:
self.response.set_exception(exc)


async def connect_http_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
**kwargs: Any,
) -> asyncio.Transport:
if proxy.scheme != "https" and kwargs.get("ssl") is not None:
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")

transport, protocol = await asyncio.get_running_loop().create_connection(
lambda: HTTPProxyConnection(ws_uri, proxy),
proxy.host,
proxy.port,
**kwargs,
)

try:
# This raises exceptions if the connection to the proxy fails.
await protocol.response
except Exception:
transport.close()
raise

return transport
120 changes: 116 additions & 4 deletions src/websockets/sync/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
import threading
import warnings
from collections.abc import Sequence
from typing import Any, Literal
from typing import Any, Literal, cast

from ..client import ClientProtocol
from ..datastructures import HeadersLike
from ..exceptions import ProxyError
from ..datastructures import Headers, HeadersLike
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus, ProxyError
from ..extensions.base import ClientExtensionFactory
from ..extensions.permessage_deflate import enable_client_permessage_deflate
from ..headers import validate_subprotocols
from ..headers import build_authorization_basic, build_host, validate_subprotocols
from ..http11 import USER_AGENT, Response
from ..protocol import CONNECTING, Event
from ..streams import StreamReader
from ..typing import LoggerLike, Origin, Subprotocol
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
from .connection import Connection
Expand Down Expand Up @@ -141,6 +142,8 @@ def connect(
additional_headers: HeadersLike | None = None,
user_agent_header: str | None = USER_AGENT,
proxy: str | Literal[True] | None = True,
proxy_ssl: ssl_module.SSLContext | None = None,
proxy_server_hostname: str | None = None,
# Timeouts
open_timeout: float | None = 10,
ping_interval: float | None = 20,
Expand Down Expand Up @@ -195,6 +198,9 @@ def connect(
to :obj:`None` to disable the proxy or to the address of a proxy
to override the system configuration. See the :doc:`proxy docs
<../../topics/proxies>` for details.
proxy_ssl: Configuration for enabling TLS on the proxy connection.
proxy_server_hostname: Host name for the TLS handshake with the proxy.
``proxy_server_hostname`` overrides the host name from ``proxy``.
open_timeout: Timeout for opening the connection in seconds.
:obj:`None` disables the timeout.
ping_interval: Interval between keepalive pings in seconds.
Expand Down Expand Up @@ -443,3 +449,109 @@ def connect_socks_proxy(
**kwargs: Any,
) -> socket.socket:
raise ImportError("python-socks is required to use a SOCKS proxy")


def prepare_connect_request(proxy: Proxy, ws_uri: WebSocketURI) -> bytes:
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
headers = Headers()
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
if proxy.username is not None:
assert proxy.password is not None # enforced by parse_proxy()
headers["Proxy-Authorization"] = build_authorization_basic(
proxy.username, proxy.password
)
# We cannot use the Request class because it supports only GET requests.
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()


def read_connect_response(sock: socket.socket, deadline: Deadline) -> Response:
reader = StreamReader()
parser = Response.parse(
reader.read_line,
reader.read_exact,
reader.read_to_eof,
include_body=False,
)
try:
while True:
sock.settimeout(deadline.timeout())
data = sock.recv(4096)
if data:
reader.feed_data(data)
else:
reader.feed_eof()
next(parser)
except StopIteration as exc:
response = cast(Response, exc.value)
if 200 <= response.status_code < 300:
return response
else:
raise InvalidProxyStatus(response)
except socket.timeout:
raise TimeoutError("timed out while connecting to HTTP proxy")
except Exception as exc:
raise InvalidProxyMessage(
"did not receive a valid HTTP response from proxy"
) from exc
finally:
sock.settimeout(None)


def connect_http_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
deadline: Deadline,
*,
ssl: ssl_module.SSLContext | None = None,
server_hostname: str | None = None,
**kwargs: Any,
) -> socket.socket:
if proxy.scheme != "https" and ssl is not None:
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")

# Connect socket

kwargs.setdefault("timeout", deadline.timeout())
sock = socket.create_connection((proxy.host, proxy.port), **kwargs)

# Initialize TLS wrapper and perform TLS handshake

if proxy.scheme == "https":
if ssl is None:
ssl = ssl_module.create_default_context()
if server_hostname is None:
server_hostname = proxy.host
sock.settimeout(deadline.timeout())
sock = ssl.wrap_socket(sock, server_hostname=server_hostname)
sock.settimeout(None)

# Send CONNECT request to the proxy and read response.

sock.sendall(prepare_connect_request(proxy, ws_uri))
try:
read_connect_response(sock, deadline)
except Exception:
sock.close()
raise

return sock


def connect_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
deadline: Deadline,
**kwargs: Any,
) -> socket.socket:
"""Connect via a proxy and return the socket."""
# parse_proxy() validates proxy.scheme.
if proxy.scheme[:5] == "socks":
# websockets is consistent with the socket module while
# python_socks is consistent across implementations.
# It will translate local_addr back to source_address.
kwargs["local_addr"] = kwargs.pop("source_address", None)
return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs)
elif proxy.scheme[:4] == "http":
return connect_http_proxy(proxy, ws_uri, deadline, **kwargs)
else:
raise AssertionError("unsupported proxy")
Loading

0 comments on commit c222821

Please sign in to comment.