From a5aa55544fb5ddf20dd114c7a240ee6b8f044786 Mon Sep 17 00:00:00 2001 From: sharevb Date: Wed, 25 Oct 2023 09:06:53 +0200 Subject: [PATCH] Fix upload with multiple blocks to allow more than 60mb Fix #64 --- transferwee.py | 99 +++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/transferwee.py b/transferwee.py index 8815e93..38097bb 100755 --- a/transferwee.py +++ b/transferwee.py @@ -38,7 +38,7 @@ will be shared via emails or link. """ -from typing import Any, List, Optional, Union +from typing import Any, List, Optional, Union, Dict import binascii import functools import hashlib @@ -61,7 +61,7 @@ WETRANSFER_EXPIRE_IN = 604800 WETRANSFER_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" - +MAX_BLOCK_SIZE = 2 * 1024 * 1024 logger = logging.getLogger(__name__) @@ -354,24 +354,22 @@ def _md5(file: str) -> str: h.update(chunk) return h.hexdigest() - -def _storm_prepare_item(file: str) -> dict[str, Union[int, str]]: +def _storm_prepare_item(file: str) -> Dict[str, List[Union[int, str]]]: """Given a file, prepare the block for blocks dictionary. Return a dictionary with "content_length" and "content_md5_hex" keys. """ - filesize = os.path.getsize(file) - - return {"content_length": filesize, "content_md5_hex": _md5(file)} - + with open(file, "rb") as f: + return [{"content_length": len(chunk), "content_md5_hex": hashlib.md5(chunk).hexdigest()} for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")] def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]: """Given an Authorization token and filenames prepare for block uploads. Return the parsed JSON response. """ + files_bids = [_storm_prepare_item(f) for f in filenames] j = { - "blocks": [_storm_prepare_item(f) for f in filenames], + "blocks": [i for sublist in files_bids for i in sublist], } requests.options( _storm_urls(authorization)["WETRANSFER_STORM_BLOCK"], @@ -390,43 +388,36 @@ def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]: "User-Agent": WETRANSFER_USER_AGENT, }, ) - return r.json() + return { "files_bids": files_bids, "blocks":r.json() } def _storm_finalize_item( - file: str, block_id: str -) -> dict[str, Union[List[str], str]]: + file: str, block_ids: List[str] +) -> Dict[str, Union[List[str], str]]: """Given a file and block_id prepare the item block dictionary. Return a dictionary with "block_ids", "item_type" and "path" keys. - - XXX: Is it possible to actually have more than one block? - XXX: If yes this - and probably other parts of the code involved with - XXX: blocks - needs to be instructed to handle them instead of - XXX: assuming that one file is associated with one block. """ filename = os.path.basename(file) return { - "block_ids": [ - block_id, - ], + "block_ids": block_ids, "item_type": "file", "path": filename, } def _storm_finalize( - authorization: str, filenames: List[str], block_ids: List[str] -) -> dict[Any, Any]: + authorization: str, filenames: List[str], block_ids: List[List[str]] +) -> Dict[Any, Any]: """Given an Authorization token, filenames and block ids finalize upload. Return the parsed JSON response. """ j = { "items": [ - _storm_finalize_item(f, bid) - for f, bid in zip(filenames, block_ids) + _storm_finalize_item(f, bids) + for f, bids in zip(filenames, block_ids) ], } requests.options( @@ -464,32 +455,33 @@ def _storm_finalize( return r.json() -def _storm_upload(url: str, file: str) -> None: +def _storm_upload(urls: List[str], file: str) -> None: """Given an url and file upload it. Does not return anything. """ - requests.options( - url, - headers={ - "Origin": "https://wetransfer.com", - "Access-Control-Request-Method": "PUT", - "User-Agent": WETRANSFER_USER_AGENT, - }, - ) with open(file, "rb") as f: - requests.put( - url, - data=f, - headers={ - "Origin": "https://wetransfer.com", - "Content-MD5": binascii.b2a_base64( - binascii.unhexlify(_md5(file)), newline=False - ), - "X-Uploader": "storm", - "User-Agent": WETRANSFER_USER_AGENT, - }, - ) + for url, chunk in zip(urls,[chunk for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")]): + requests.options( + url, + headers={ + "Origin": "https://wetransfer.com", + "Access-Control-Request-Method": "PUT", + "User-Agent": WETRANSFER_USER_AGENT, + }, + ) + requests.put( + url, + data=chunk, + headers={ + "Origin": "https://wetransfer.com", + "Content-MD5": binascii.b2a_base64( + binascii.unhexlify(hashlib.md5(chunk).hexdigest()), newline=False + ), + "X-Uploader": "storm", + "User-Agent": WETRANSFER_USER_AGENT, + }, + ) def _finalize_upload( @@ -583,15 +575,24 @@ def upload( logger.debug(f"Doing preflight storm") _storm_preflight(transfer["storm_upload_token"], files) logger.debug(f"Preparing storm block upload") - blocks = _storm_prepare(transfer["storm_upload_token"], files) - for f, b in zip(files, blocks["data"]["blocks"]): + prepare_data = _storm_prepare(transfer["storm_upload_token"], files) + blocks = prepare_data["blocks"]["data"]["blocks"] + start_block_index = 0 + file_index = 0 + file_bids = [] + for f in files: logger.debug(f"Uploading file {f}") - _storm_upload(b["presigned_put_url"], f) + file_chunks_count = len(prepare_data["files_bids"][file_index]) + file_blocks = blocks[start_block_index:start_block_index+file_chunks_count] + file_bids.append(file_blocks) + _storm_upload([b["presigned_put_url"] for b in file_blocks], f) + start_block_index += file_chunks_count + file_index += 1 logger.debug(f"Finalizing storm batch upload") _storm_finalize( transfer["storm_upload_token"], files, - [b["block_id"] for b in blocks["data"]["blocks"]], + [[b["block_id"] for b in f] for f in file_bids], ) logger.debug(f"Finalizing upload with transfer id {transfer['id']}") shortened_url = _finalize_upload(transfer["id"], s)["shortened_url"]