From d8c7f31ee7b6748ea74399a23bd2cd97e0bbc74c Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 12 Nov 2021 22:35:47 +0100 Subject: [PATCH 1/7] test(WSGI servers): add a failing test for byterange support in the TDD fashion --- tests/test_wsgi_servers.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_wsgi_servers.py b/tests/test_wsgi_servers.py index d9e660c5f..ec4aaf410 100644 --- a/tests/test_wsgi_servers.py +++ b/tests/test_wsgi_servers.py @@ -232,3 +232,21 @@ def test_static_file(self, server_url): assert resp.headers.get('Content-Disposition') == ( 'attachment; filename="test_wsgi_servers.py"' ) + + @pytest.mark.parametrize('byte_range,expected_head', [ + ('7-', b'hashlib'), + ('2-6', b'port'), + ('32-38', b'random'), + ('-47', b'The content of this comment is part of a test.\n'), + ]) + def test_static_file_byte_range(self, byte_range, expected_head, server_url): + resp = requests.get( + server_url + '/tests/test_wsgi_servers.py', + timeout=_REQUEST_TIMEOUT, + headers={'Range': 'bytes=' + byte_range}, + ) + + assert resp.status_code == 206 + assert resp.content.startswith(expected_head) + + # NOTE(vytas): The content of this comment is part of a test. From 515d010f628a736e0d42ad7a70c1c03693997027 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 13 Nov 2021 21:15:23 +0100 Subject: [PATCH 2/7] feat(static): set `Content-Length` by fstat()-ing open file --- falcon/routing/static.py | 41 ++++++++----- tests/test_static.py | 115 ++++++++++++++++++++----------------- tests/test_wsgi_servers.py | 56 ++++++++++++------ 3 files changed, 126 insertions(+), 86 deletions(-) diff --git a/falcon/routing/static.py b/falcon/routing/static.py index 565f0dbac..7910f5b12 100644 --- a/falcon/routing/static.py +++ b/falcon/routing/static.py @@ -15,37 +15,46 @@ def _open_range(file_path, req_range): file_path (str): Path to the file to open. req_range (Optional[Tuple[int, int]]): Request.range value. Returns: - tuple: Two-member tuple of (stream, content-range). If req_range is - ``None`` or ignored, content-range will be ``None``; otherwise, - the stream will be appropriately seeked and possibly bounded, - and the content-range will be a tuple of (start, end, size). + tuple: Three-member tuple of (stream, size, content-range). + If req_range is ``None`` or ignored, content-range will be + ``None``; otherwise, the stream will be appropriately seeked and + possibly bounded, and the content-range will be a tuple of + (start, end, size). """ fh = io.open(file_path, 'rb') + size = os.fstat(fh.fileno()).st_size if req_range is None: - return fh, None + return fh, size, None start, end = req_range - size = os.fstat(fh.fileno()).st_size if size == 0: - # Ignore Range headers for zero-byte files; just serve the empty body - # since Content-Range can't be used to express a zero-byte body - return fh, None + # NOTE(tipabu): Ignore Range headers for zero-byte files; just serve + # the empty body since Content-Range can't be used to express a + # zero-byte body + return fh, 0, None if start < 0 and end == -1: - # Special case: only want the last N bytes + # NOTE(tipabu): Special case: only want the last N bytes start = max(start, -size) fh.seek(start, os.SEEK_END) - return fh, (size + start, size - 1, size) + # NOTE(vytas): Wrap in order to prevent sendfile from being used, as + # its implementation was found to be buggy in many popular WSGI + # servers for open files with a non-zero offset. + return _BoundedFile(fh, -start), size, (size + start, size - 1, size) if start >= size: + fh.close() raise falcon.HTTPRangeNotSatisfiable(size) fh.seek(start) if end == -1: - return fh, (start, size - 1, size) + # NOTE(vytas): Wrap in order to prevent sendfile from being used, as + # its implementation was found to be buggy in many popular WSGI + # servers for open files with a non-zero offset. + return _BoundedFile(fh, size - start), size, (start, size - 1, size) end = min(end, size - 1) - return _BoundedFile(fh, end - start + 1), (start, end, size) + return _BoundedFile(fh, end - start + 1), size, (start, end, size) class _BoundedFile: @@ -184,14 +193,16 @@ def __call__(self, req, resp): if req.range_unit != 'bytes': req_range = None try: - resp.stream, content_range = _open_range(file_path, req_range) + stream, size, content_range = _open_range(file_path, req_range) + resp.set_stream(stream, size) except IOError: if self._fallback_filename is None: raise falcon.HTTPNotFound() try: - resp.stream, content_range = _open_range( + stream, size, content_range = _open_range( self._fallback_filename, req_range ) + resp.set_stream(stream, size) file_path = self._fallback_filename except IOError: raise falcon.HTTPNotFound() diff --git a/tests/test_static.py b/tests/test_static.py index 19a1adb9b..f041fe13f 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -28,6 +28,33 @@ def create_sr(asgi, *args, **kwargs): return sr_type(*args, **kwargs) +@pytest.fixture +def patch_open(monkeypatch): + def patch(content=None, validate=None): + def open(path, mode): + class FakeFD(int): + pass + + class FakeStat: + def __init__(self, size): + self.st_size = size + + if validate: + validate(path) + + data = path.encode() if content is None else content + fake_file = io.BytesIO(data) + fd = FakeFD(1337) + fd._stat = FakeStat(len(data)) + fake_file.fileno = lambda: fd + return fake_file + + monkeypatch.setattr(io, 'open', open) + monkeypatch.setattr(os, 'fstat', lambda fileno: fileno._stat) + + return patch + + @pytest.mark.parametrize( 'uri', [ @@ -83,8 +110,8 @@ def create_sr(asgi, *args, **kwargs): '/static/\ufffdsomething', ], ) -def test_bad_path(asgi, uri, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO()) +def test_bad_path(asgi, uri, patch_open): + patch_open(b'') sr_type = StaticRouteAsync if asgi else StaticRoute sr = sr_type('/static', '/var/www/statics') @@ -177,8 +204,8 @@ def test_invalid_args_fallback_filename(client, default): ), ], ) -def test_good_path(asgi, uri_prefix, uri_path, expected_path, mtype, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO(path.encode())) +def test_good_path(asgi, uri_prefix, uri_path, expected_path, mtype, patch_open): + patch_open() sr = create_sr(asgi, uri_prefix, '/var/www/statics') @@ -219,21 +246,20 @@ async def run(): ) @pytest.mark.parametrize('use_fallback', [True, False]) def test_range_requests( - client, range_header, exp_content_range, exp_content, monkeypatch, use_fallback + client, + range_header, + exp_content_range, + exp_content, + patch_open, + monkeypatch, + use_fallback, ): - fake_file = io.BytesIO(b'0123456789abcdef') - fake_file.fileno = lambda: 123 - - def fake_open(path, mode): + def validate(path): if use_fallback and not path.endswith('index.html'): raise OSError(errno.ENOENT, 'File not found') - return fake_file - class FakeStat: - st_size = len(fake_file.getvalue()) + patch_open(b'0123456789abcdef', validate=validate) - monkeypatch.setattr(io, 'open', fake_open) - monkeypatch.setattr(os, 'fstat', lambda fileno: FakeStat()) monkeypatch.setattr('os.path.isfile', lambda file: file.endswith('index.html')) client.app.add_static_route( @@ -270,15 +296,8 @@ class FakeStat: 'bytes=-30', ], ) -def test_range_request_zero_length(client, range_header, monkeypatch): - fake_file = io.BytesIO(b'') - fake_file.fileno = lambda: 123 - - class FakeStat: - st_size = 0 - - monkeypatch.setattr(io, 'open', lambda path, mode: fake_file) - monkeypatch.setattr(os, 'fstat', lambda fileno: FakeStat()) +def test_range_request_zero_length(client, range_header, patch_open): + patch_open(b'') client.app.add_static_route('/downloads', '/opt/somesite/downloads') @@ -305,15 +324,8 @@ class FakeStat: ('bytes=16-', falcon.HTTP_416), ], ) -def test_bad_range_requests(client, range_header, exp_status, monkeypatch): - fake_file = io.BytesIO(b'0123456789abcdef') - fake_file.fileno = lambda: 123 - - class FakeStat: - st_size = len(fake_file.getvalue()) - - monkeypatch.setattr(io, 'open', lambda path, mode: fake_file) - monkeypatch.setattr(os, 'fstat', lambda fileno: FakeStat()) +def test_bad_range_requests(client, range_header, exp_status, patch_open): + patch_open(b'0123456789abcdef') client.app.add_static_route('/downloads', '/opt/somesite/downloads') @@ -325,8 +337,8 @@ class FakeStat: assert response.headers.get('Content-Range') == 'bytes */16' -def test_pathlib_path(asgi, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO(path.encode())) +def test_pathlib_path(asgi, patch_open): + patch_open() sr = create_sr(asgi, '/static/', pathlib.Path('/var/www/statics')) req_path = '/static/css/test.css' @@ -349,8 +361,8 @@ async def run(): assert body.decode() == os.path.normpath('/var/www/statics/css/test.css') -def test_lifo(client, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO(path.encode())) +def test_lifo(client, patch_open): + patch_open() client.app.add_static_route('/downloads', '/opt/somesite/downloads') client.app.add_static_route('/downloads/archive', '/opt/somesite/x') @@ -364,8 +376,8 @@ def test_lifo(client, monkeypatch): assert response.text == os.path.normpath('/opt/somesite/x/thingtoo.zip') -def test_lifo_negative(client, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO(path.encode())) +def test_lifo_negative(client, patch_open): + patch_open() client.app.add_static_route('/downloads/archive', '/opt/somesite/x') client.app.add_static_route('/downloads', '/opt/somesite/downloads') @@ -381,8 +393,8 @@ def test_lifo_negative(client, monkeypatch): ) -def test_downloadable(client, monkeypatch): - monkeypatch.setattr(io, 'open', lambda path, mode: io.BytesIO(path.encode())) +def test_downloadable(client, patch_open): + patch_open() client.app.add_static_route( '/downloads', '/opt/somesite/downloads', downloadable=True @@ -433,15 +445,14 @@ def test_downloadable_not_found(client): ) @pytest.mark.parametrize('downloadable', [True, False]) def test_fallback_filename( - asgi, uri, default, expected, content_type, downloadable, monkeypatch + asgi, uri, default, expected, content_type, downloadable, patch_open, monkeypatch ): - def mock_open(path, mode): - if os.path.normpath(default) in path: - return io.BytesIO(path.encode()) + def validate(path): + if os.path.normpath(default) not in path: + raise IOError() - raise IOError() + patch_open(validate=validate) - monkeypatch.setattr(io, 'open', mock_open) monkeypatch.setattr( 'os.path.isfile', lambda file: os.path.normpath(default) in file ) @@ -494,14 +505,14 @@ async def run(): ], ) def test_e2e_fallback_filename( - client, monkeypatch, strip_slash, path, fallback, static_exp, assert_axp + client, patch_open, monkeypatch, strip_slash, path, fallback, static_exp, assert_axp ): - def mockOpen(path, mode): - if 'index' in path and 'raise' not in path: - return io.BytesIO(path.encode()) - raise IOError() + def validate(path): + if 'index' not in path or 'raise' in path: + raise IOError() + + patch_open(validate=validate) - monkeypatch.setattr(io, 'open', mockOpen) monkeypatch.setattr('os.path.isfile', lambda file: 'index' in file) client.app.req_options.strip_url_path_trailing_slash = strip_slash diff --git a/tests/test_wsgi_servers.py b/tests/test_wsgi_servers.py index ec4aaf410..3bc95de30 100644 --- a/tests/test_wsgi_servers.py +++ b/tests/test_wsgi_servers.py @@ -109,21 +109,28 @@ def _waitress_args(host, port): ) -@pytest.fixture( - params=[ - _gunicorn_args, - _meinheld_args, - _uvicorn_args, - _uwsgi_args, - _waitress_args, - ] -) -def server_url(request): +@pytest.fixture(params=['gunicorn', 'meinheld', 'uvicorn', 'uwsgi', 'waitress']) +def wsgi_server(request): + return request.param + + +@pytest.fixture +def server_args(wsgi_server): + servers = { + 'gunicorn': _gunicorn_args, + 'meinheld': _meinheld_args, + 'uvicorn': _uvicorn_args, + 'uwsgi': _uwsgi_args, + 'waitress': _waitress_args, + } + return servers[wsgi_server] + + +@pytest.fixture +def server_url(server_args): if sys.platform.startswith('win'): pytest.skip('WSGI server tests are currently unsupported on Windows') - server_args = request.param - for attempt in range(3): server_port = testing.get_unused_port() base_url = 'http://{}:{}'.format(_SERVER_HOST, server_port) @@ -233,13 +240,24 @@ def test_static_file(self, server_url): 'attachment; filename="test_wsgi_servers.py"' ) - @pytest.mark.parametrize('byte_range,expected_head', [ - ('7-', b'hashlib'), - ('2-6', b'port'), - ('32-38', b'random'), - ('-47', b'The content of this comment is part of a test.\n'), - ]) - def test_static_file_byte_range(self, byte_range, expected_head, server_url): + @pytest.mark.parametrize( + 'byte_range,expected_head', + [ + ('7-', b'hashlib'), + ('2-6', b'port'), + ('32-38', b'random'), + ('-47', b'The content of this comment is part of a test.\n'), + ], + ) + def test_static_file_byte_range( + self, byte_range, expected_head, wsgi_server, server_url + ): + if wsgi_server == 'meinheld': + pytest.xfail( + 'Meinheld\'s file_wrapper fails without a fileno(), see also: ' + 'https://github.com/mopemope/meinheld/issues/130' + ) + resp = requests.get( server_url + '/tests/test_wsgi_servers.py', timeout=_REQUEST_TIMEOUT, From b3a424e8c5bd2ec258dff4bbcd24bcf666d0da3d Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 13 Nov 2021 22:02:03 +0100 Subject: [PATCH 3/7] fix(static): set the correct `Content-Length` for range responses --- falcon/routing/static.py | 18 ++++++++++-------- tests/test_wsgi_servers.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/falcon/routing/static.py b/falcon/routing/static.py index 7910f5b12..08236c418 100644 --- a/falcon/routing/static.py +++ b/falcon/routing/static.py @@ -15,7 +15,7 @@ def _open_range(file_path, req_range): file_path (str): Path to the file to open. req_range (Optional[Tuple[int, int]]): Request.range value. Returns: - tuple: Three-member tuple of (stream, size, content-range). + tuple: Three-member tuple of (stream, content-length, content-range). If req_range is ``None`` or ignored, content-range will be ``None``; otherwise, the stream will be appropriately seeked and possibly bounded, and the content-range will be a tuple of @@ -40,7 +40,7 @@ def _open_range(file_path, req_range): # NOTE(vytas): Wrap in order to prevent sendfile from being used, as # its implementation was found to be buggy in many popular WSGI # servers for open files with a non-zero offset. - return _BoundedFile(fh, -start), size, (size + start, size - 1, size) + return _BoundedFile(fh, -start), -start, (size + start, size - 1, size) if start >= size: fh.close() @@ -51,10 +51,12 @@ def _open_range(file_path, req_range): # NOTE(vytas): Wrap in order to prevent sendfile from being used, as # its implementation was found to be buggy in many popular WSGI # servers for open files with a non-zero offset. - return _BoundedFile(fh, size - start), size, (start, size - 1, size) + length = size - start + return _BoundedFile(fh, length), length, (start, size - 1, size) end = min(end, size - 1) - return _BoundedFile(fh, end - start + 1), size, (start, end, size) + length = end - start + 1 + return _BoundedFile(fh, length), length, (start, end, size) class _BoundedFile: @@ -193,16 +195,16 @@ def __call__(self, req, resp): if req.range_unit != 'bytes': req_range = None try: - stream, size, content_range = _open_range(file_path, req_range) - resp.set_stream(stream, size) + stream, length, content_range = _open_range(file_path, req_range) + resp.set_stream(stream, length) except IOError: if self._fallback_filename is None: raise falcon.HTTPNotFound() try: - stream, size, content_range = _open_range( + stream, length, content_range = _open_range( self._fallback_filename, req_range ) - resp.set_stream(stream, size) + resp.set_stream(stream, length) file_path = self._fallback_filename except IOError: raise falcon.HTTPNotFound() diff --git a/tests/test_wsgi_servers.py b/tests/test_wsgi_servers.py index 3bc95de30..834180d8b 100644 --- a/tests/test_wsgi_servers.py +++ b/tests/test_wsgi_servers.py @@ -240,6 +240,10 @@ def test_static_file(self, server_url): 'attachment; filename="test_wsgi_servers.py"' ) + content_length = int(resp.headers['Content-Length']) + file_size = os.path.getsize(__file__) + assert len(resp.content) == content_length == file_size + @pytest.mark.parametrize( 'byte_range,expected_head', [ @@ -267,4 +271,11 @@ def test_static_file_byte_range( assert resp.status_code == 206 assert resp.content.startswith(expected_head) + content_length = int(resp.headers['Content-Length']) + assert len(resp.content) == content_length + + file_size = os.path.getsize(__file__) + content_range_size = int(resp.headers['Content-Range'].split('/')[-1]) + assert file_size == content_range_size + # NOTE(vytas): The content of this comment is part of a test. From 00e409f467c10c161098783007f5744be1e3f56e Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 13 Nov 2021 22:25:19 +0100 Subject: [PATCH 4/7] docs(static): add a warning on app server timeout settings --- falcon/app.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/falcon/app.py b/falcon/app.py index 6787fc088..71cc8b50c 100644 --- a/falcon/app.py +++ b/falcon/app.py @@ -589,6 +589,13 @@ def add_static_route( For security reasons, the directory and the fallback_filename (if provided) should be read only for the account running the application. + Warning: + If you need to serve large files and/or progressive downloads (such + as in the case of video streaming) through the Falcon app, check + that your application server's timeout settings can accomodate the + expected request duration (for instance, the popular Gunicorn kills + ``sync`` workers after 30 seconds unless configured otherwise). + Note: For ASGI apps, file reads are made non-blocking by scheduling them on the default executor. From ec5cf37ae9c4707d95862e1d680ae0655a23e685 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 13 Nov 2021 22:30:00 +0100 Subject: [PATCH 5/7] style: fighting black vs pep8 plugin quote styles --- tests/test_wsgi_servers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wsgi_servers.py b/tests/test_wsgi_servers.py index 834180d8b..ed0381ddd 100644 --- a/tests/test_wsgi_servers.py +++ b/tests/test_wsgi_servers.py @@ -258,7 +258,7 @@ def test_static_file_byte_range( ): if wsgi_server == 'meinheld': pytest.xfail( - 'Meinheld\'s file_wrapper fails without a fileno(), see also: ' + "Meinheld's file_wrapper fails without a fileno(), see also: " 'https://github.com/mopemope/meinheld/issues/130' ) From b01ac64b3ea85573a640436f825a88b8fea22c42 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sun, 14 Nov 2021 20:26:04 +0100 Subject: [PATCH 6/7] test(static): improve tests as per review comments --- tests/test_static.py | 2 ++ tests/test_wsgi_servers.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/tests/test_static.py b/tests/test_static.py index f041fe13f..67467edb9 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -274,6 +274,7 @@ def validate(path): else: assert response.status == falcon.HTTP_206 assert response.text == exp_content + assert int(response.headers['Content-Length']) == len(exp_content) assert response.headers.get('Content-Range') == exp_content_range assert response.headers.get('Accept-Ranges') == 'bytes' if use_fallback: @@ -528,6 +529,7 @@ def test(prefix, directory, expected): else: assert response.status == falcon.HTTP_200 assert response.text == os.path.normpath(directory + expected) + assert int(response.headers['Content-Length']) == len(response.text) test('/static', '/opt/somesite/static/', static_exp) test('/assets', '/opt/somesite/assets/', assert_axp) diff --git a/tests/test_wsgi_servers.py b/tests/test_wsgi_servers.py index ed0381ddd..adc345033 100644 --- a/tests/test_wsgi_servers.py +++ b/tests/test_wsgi_servers.py @@ -228,6 +228,10 @@ def test_static_file(self, server_url): server_url + '/tests/test_wsgi_servers.py', timeout=_REQUEST_TIMEOUT ) assert resp.status_code == 200 + + # TODO(vytas): In retrospect, it would be easier to maintain these + # static route tests by creating a separate file instead of relying + # on the content of this same __file__. assert resp.text.startswith( 'import hashlib\n' 'import os\n' @@ -278,4 +282,8 @@ def test_static_file_byte_range( content_range_size = int(resp.headers['Content-Range'].split('/')[-1]) assert file_size == content_range_size + # TODO(vytas): In retrospect, it would be easier to maintain these + # static route tests by creating a separate file instead of relying + # on the content of this same __file__. + # NOTE(vytas): The content of this comment is part of a test. From ca4e2792e63b55945e264930a491aa95802d781c Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sun, 14 Nov 2021 20:41:02 +0100 Subject: [PATCH 7/7] docs(newsfragments): add a newsfragment for the added feature. --- docs/_newsfragments/1991.newandimproved.rst | 3 +++ falcon/routing/static.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 docs/_newsfragments/1991.newandimproved.rst diff --git a/docs/_newsfragments/1991.newandimproved.rst b/docs/_newsfragments/1991.newandimproved.rst new file mode 100644 index 000000000..db1f6f627 --- /dev/null +++ b/docs/_newsfragments/1991.newandimproved.rst @@ -0,0 +1,3 @@ +:func:`Static routes ` now set the +``Content-Length`` header indicating a served file's size +(or length of the rendered content range). diff --git a/falcon/routing/static.py b/falcon/routing/static.py index 08236c418..19d4ddc7a 100644 --- a/falcon/routing/static.py +++ b/falcon/routing/static.py @@ -29,12 +29,12 @@ def _open_range(file_path, req_range): start, end = req_range if size == 0: # NOTE(tipabu): Ignore Range headers for zero-byte files; just serve - # the empty body since Content-Range can't be used to express a - # zero-byte body + # the empty body since Content-Range can't be used to express a + # zero-byte body. return fh, 0, None if start < 0 and end == -1: - # NOTE(tipabu): Special case: only want the last N bytes + # NOTE(tipabu): Special case: only want the last N bytes. start = max(start, -size) fh.seek(start, os.SEEK_END) # NOTE(vytas): Wrap in order to prevent sendfile from being used, as