Skip to content

Commit

Permalink
Merge pull request #42 from Kirill-Lekhov/feature/add-bytes-streaming
Browse files Browse the repository at this point in the history
Add the 'get_stream_bytes_response' function
  • Loading branch information
m1kc authored Sep 5, 2024
2 parents 513e806 + 0d8fd44 commit fb41f64
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 4 deletions.
28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ensure_iterable('smth') # --> ['smth']
def my_function(one_or_multiple_args):
for arg in ensure_iterable(one_or_multiple_args):
...

my_function(['log', 'smog'])
my_function('dog')
```
Expand Down Expand Up @@ -211,7 +211,7 @@ t.footer()
```

* Pass an iterable to iterator_over()

```python
from notalib.hypertext import TablePrinter
t = TablePrinter(['a', 'b'])
Expand Down Expand Up @@ -297,6 +297,15 @@ tag = get_last_tag()
# Tag(hash='c4b6e06f57ab4773e2881d286804d4e3141b5195', label='v1.4.0')
```

#### notalib.file_iterator.file_iterator
Iterates over byte buffer and yields chunks of specified size.

```python
with open("<file_path>", mode="rb") as file:
for chunk in file_iterator(file):
...
```

## Tools for Pandas

#### notalib.pandas.pandasplus.row_to_dict
Expand Down Expand Up @@ -361,13 +370,26 @@ Stream all elements of iterable object as JSON array using the StreamingHttpResp
```python
class SomeViewSet(...):
...

def list(self, request, *args, **kwargs):
...
return stream_json(data)
```


#### notalib.django.bytes_stream.get_stream_bytes_response

Stream bytes IO part by the RANGE header value or all buffer content.

```python
class SomeView(...):
def get(self, request, *args, **kwargs):
with open("<file_path>", mode="rb") as file:
...
return get_stream_bytes_response(file, request, content_type="<file_content_type>")
```


## <s>Django/Clickhouse</s>

_Deprecated since 2.2.0._
Expand Down
48 changes: 48 additions & 0 deletions notalib/django/bytes_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from notalib.file_iterator import file_iterator

import re
from io import BytesIO
from typing import Union

from django.http.request import HttpRequest
from django.http import StreamingHttpResponse, FileResponse


TypeResponse = Union[StreamingHttpResponse, FileResponse]
# Used for streaming audio files. See: https://www.djangotricks.com/tricks/4S7qbNhtUeAD/
RANGE_RE = re.compile(r"bytes\s*=\s*(\d+)\s*-\s*(\d*)", re.I)


def get_stream_bytes_response(buffer: BytesIO, request: HttpRequest, content_type: str) -> TypeResponse:
"""
Returns part of a buffer or a entire buffer, depending on the Range header.
Args:
buffer: A buffer whose content needs to be returned in the response.
request: HttpRequest object.
content_type: Response content type.
"""
size = buffer.getbuffer().nbytes
range_header = request.META.get("HTTP_RANGE", "").strip()
range_match = RANGE_RE.match(range_header)

if range_match:
first_byte, last_byte = range_match.groups()
first_byte = int(first_byte) if first_byte else 0
last_byte = first_byte + 8388608 # 1024 * 1024 * 8

if last_byte >= size:
last_byte = size - 1

length = last_byte - first_byte + 1
response = StreamingHttpResponse(
file_iterator(buffer, offset=first_byte, length=length),
status=206,
content_type=content_type,
)
response['Content-Range'] = f"bytes {first_byte}-{last_byte}/{size}"
response['Accept-Ranges'] = "bytes"

return response
else:
return FileResponse(buffer, content_type=content_type)
30 changes: 30 additions & 0 deletions notalib/django/bytes_stream_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from notalib.django.bytes_stream import get_stream_bytes_response

from io import BytesIO

from django.http import StreamingHttpResponse, FileResponse


class FakeRequest:
def __init__(self, http_range: str = "") -> None:
self.META = {"HTTP_RANGE": http_range}


class TestGetStreamBytesResponse:
def test_range_mismatch(self):
buffer = BytesIO(b"deadbee")

# Function ignores end of range
response = get_stream_bytes_response(buffer, FakeRequest("bytes = 0 - 5"), "application/octet-stream")
assert isinstance(response, StreamingHttpResponse)
assert list(response.streaming_content) == [b"deadbee"]
assert response.headers.get("Content-Type") == "application/octet-stream"

response = get_stream_bytes_response(buffer, FakeRequest("bytes = 4 - 999"), "application/octet-stream")
assert isinstance(response, StreamingHttpResponse)
assert list(response.streaming_content) == [b"bee"]

def test_range_match(self):
response = get_stream_bytes_response(BytesIO(), FakeRequest(), "application/octet-stream")
assert isinstance(response, FileResponse)
assert response.headers.get("Content-Type") == "application/octet-stream"
37 changes: 37 additions & 0 deletions notalib/file_iterator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
from io import BytesIO
from typing import Optional, Generator


def file_iterator(
buffer: BytesIO,
chunk_size: int = 8192,
offset: int = 0,
length: Optional[int] = None,
) -> Generator[bytes, None, None]:
"""
Iterates over byte buffer and yields chunks of specified size.
Args:
buffer: A buffer the data from which you want to split into chunks.
chunk_size: The size of a single chunk returned during iteration.
offset: Offset relative to the beginning of a buffer (the size of the content that will not be yielded).
length: The length of a buffer content that needs to be yielded.
Returns:
Generator of buffer content.
"""
buffer.seek(offset, os.SEEK_SET)
remaining = length

while True:
bytes_length = (chunk_size if remaining is None else min(remaining, chunk_size))
data = buffer.read(bytes_length)

if not data:
break

if remaining:
remaining -= len(data)

yield data
33 changes: 33 additions & 0 deletions notalib/file_iterator_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from notalib.file_iterator import file_iterator

from io import BytesIO

import pytest


class TestFileIterator:
def test_with_empty_buffer(self):
buffer = BytesIO()

with pytest.raises(StopIteration):
next(file_iterator(buffer))

def test_chunk_size(self):
buffer = BytesIO(b"deadbee")

assert list(file_iterator(buffer, chunk_size=1)) == [b"d", b"e", b"a", b"d", b"b", b"e", b"e"]
assert list(file_iterator(buffer, chunk_size=7)) == [b"deadbee"]

def test_offset(self):
buffer = BytesIO(b"deadbee")

assert list(file_iterator(buffer, chunk_size=7)) == [b"deadbee"]
assert list(file_iterator(buffer, chunk_size=7, offset=0)) == [b"deadbee"]
assert list(file_iterator(buffer, chunk_size=7, offset=4)) == [b"bee"]

def test_length(self):
buffer = BytesIO(b"deadbee")

assert list(file_iterator(buffer, chunk_size=7)) == [b"deadbee"]
assert list(file_iterator(buffer, chunk_size=7, length=7)) == [b"deadbee"]
assert list(file_iterator(buffer, chunk_size=7, length=4)) == [b"dead"]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "notalib"
version = "2.3.0"
version = "2.4.0-rc0"
description = "A collection of utility functions & classes"
authors = ["m1kc (Max Musatov) <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit fb41f64

Please sign in to comment.