Skip to content

Commit

Permalink
Fix upload with multiple blocks to allow more than 60mb
Browse files Browse the repository at this point in the history
  • Loading branch information
sharevb committed Oct 25, 2023
1 parent 7112c1f commit a5aa555
Showing 1 changed file with 50 additions and 49 deletions.
99 changes: 50 additions & 49 deletions transferwee.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
will be shared via emails or link.
"""

from typing import Any, List, Optional, Union
from typing import Any, List, Optional, Union, Dict
import binascii
import functools
import hashlib
Expand All @@ -61,7 +61,7 @@

WETRANSFER_EXPIRE_IN = 604800
WETRANSFER_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"

MAX_BLOCK_SIZE = 2 * 1024 * 1024

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -354,24 +354,22 @@ def _md5(file: str) -> str:
h.update(chunk)
return h.hexdigest()


def _storm_prepare_item(file: str) -> dict[str, Union[int, str]]:
def _storm_prepare_item(file: str) -> Dict[str, List[Union[int, str]]]:
"""Given a file, prepare the block for blocks dictionary.
Return a dictionary with "content_length" and "content_md5_hex" keys.
"""
filesize = os.path.getsize(file)

return {"content_length": filesize, "content_md5_hex": _md5(file)}

with open(file, "rb") as f:
return [{"content_length": len(chunk), "content_md5_hex": hashlib.md5(chunk).hexdigest()} for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")]

def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]:
"""Given an Authorization token and filenames prepare for block uploads.
Return the parsed JSON response.
"""
files_bids = [_storm_prepare_item(f) for f in filenames]
j = {
"blocks": [_storm_prepare_item(f) for f in filenames],
"blocks": [i for sublist in files_bids for i in sublist],
}
requests.options(
_storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
Expand All @@ -390,43 +388,36 @@ def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]:
"User-Agent": WETRANSFER_USER_AGENT,
},
)
return r.json()
return { "files_bids": files_bids, "blocks":r.json() }


def _storm_finalize_item(
file: str, block_id: str
) -> dict[str, Union[List[str], str]]:
file: str, block_ids: List[str]
) -> Dict[str, Union[List[str], str]]:
"""Given a file and block_id prepare the item block dictionary.
Return a dictionary with "block_ids", "item_type" and "path" keys.
XXX: Is it possible to actually have more than one block?
XXX: If yes this - and probably other parts of the code involved with
XXX: blocks - needs to be instructed to handle them instead of
XXX: assuming that one file is associated with one block.
"""
filename = os.path.basename(file)

return {
"block_ids": [
block_id,
],
"block_ids": block_ids,
"item_type": "file",
"path": filename,
}


def _storm_finalize(
authorization: str, filenames: List[str], block_ids: List[str]
) -> dict[Any, Any]:
authorization: str, filenames: List[str], block_ids: List[List[str]]
) -> Dict[Any, Any]:
"""Given an Authorization token, filenames and block ids finalize upload.
Return the parsed JSON response.
"""
j = {
"items": [
_storm_finalize_item(f, bid)
for f, bid in zip(filenames, block_ids)
_storm_finalize_item(f, bids)
for f, bids in zip(filenames, block_ids)
],
}
requests.options(
Expand Down Expand Up @@ -464,32 +455,33 @@ def _storm_finalize(
return r.json()


def _storm_upload(url: str, file: str) -> None:
def _storm_upload(urls: List[str], file: str) -> None:
"""Given an url and file upload it.
Does not return anything.
"""
requests.options(
url,
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "PUT",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
with open(file, "rb") as f:
requests.put(
url,
data=f,
headers={
"Origin": "https://wetransfer.com",
"Content-MD5": binascii.b2a_base64(
binascii.unhexlify(_md5(file)), newline=False
),
"X-Uploader": "storm",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
for url, chunk in zip(urls,[chunk for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")]):
requests.options(
url,
headers={
"Origin": "https://wetransfer.com",
"Access-Control-Request-Method": "PUT",
"User-Agent": WETRANSFER_USER_AGENT,
},
)
requests.put(
url,
data=chunk,
headers={
"Origin": "https://wetransfer.com",
"Content-MD5": binascii.b2a_base64(
binascii.unhexlify(hashlib.md5(chunk).hexdigest()), newline=False
),
"X-Uploader": "storm",
"User-Agent": WETRANSFER_USER_AGENT,
},
)


def _finalize_upload(
Expand Down Expand Up @@ -583,15 +575,24 @@ def upload(
logger.debug(f"Doing preflight storm")
_storm_preflight(transfer["storm_upload_token"], files)
logger.debug(f"Preparing storm block upload")
blocks = _storm_prepare(transfer["storm_upload_token"], files)
for f, b in zip(files, blocks["data"]["blocks"]):
prepare_data = _storm_prepare(transfer["storm_upload_token"], files)
blocks = prepare_data["blocks"]["data"]["blocks"]
start_block_index = 0
file_index = 0
file_bids = []
for f in files:
logger.debug(f"Uploading file {f}")
_storm_upload(b["presigned_put_url"], f)
file_chunks_count = len(prepare_data["files_bids"][file_index])
file_blocks = blocks[start_block_index:start_block_index+file_chunks_count]
file_bids.append(file_blocks)
_storm_upload([b["presigned_put_url"] for b in file_blocks], f)
start_block_index += file_chunks_count
file_index += 1
logger.debug(f"Finalizing storm batch upload")
_storm_finalize(
transfer["storm_upload_token"],
files,
[b["block_id"] for b in blocks["data"]["blocks"]],
[[b["block_id"] for b in f] for f in file_bids],
)
logger.debug(f"Finalizing upload with transfer id {transfer['id']}")
shortened_url = _finalize_upload(transfer["id"], s)["shortened_url"]
Expand Down

0 comments on commit a5aa555

Please sign in to comment.