From a5aa55544fb5ddf20dd114c7a240ee6b8f044786 Mon Sep 17 00:00:00 2001
From: sharevb <sharevb@gmail.com>
Date: Wed, 25 Oct 2023 09:06:53 +0200
Subject: [PATCH] Fix upload with multiple blocks to allow more than 60mb

Fix #64
---
 transferwee.py | 99 +++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 49 deletions(-)

diff --git a/transferwee.py b/transferwee.py
index 8815e93..38097bb 100755
--- a/transferwee.py
+++ b/transferwee.py
@@ -38,7 +38,7 @@
 will be shared via emails or link.
 """
 
-from typing import Any, List, Optional, Union
+from typing import Any, List, Optional, Union, Dict
 import binascii
 import functools
 import hashlib
@@ -61,7 +61,7 @@
 
 WETRANSFER_EXPIRE_IN = 604800
 WETRANSFER_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
-
+MAX_BLOCK_SIZE = 2 * 1024 * 1024
 
 logger = logging.getLogger(__name__)
 
@@ -354,24 +354,22 @@ def _md5(file: str) -> str:
             h.update(chunk)
     return h.hexdigest()
 
-
-def _storm_prepare_item(file: str) -> dict[str, Union[int, str]]:
+def _storm_prepare_item(file: str) -> Dict[str, List[Union[int, str]]]:
     """Given a file, prepare the block for blocks dictionary.
 
     Return a dictionary with "content_length" and "content_md5_hex" keys.
     """
-    filesize = os.path.getsize(file)
-
-    return {"content_length": filesize, "content_md5_hex": _md5(file)}
-
+    with open(file, "rb") as f:
+        return [{"content_length": len(chunk), "content_md5_hex": hashlib.md5(chunk).hexdigest()} for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")]
 
 def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]:
     """Given an Authorization token and filenames prepare for block uploads.
 
     Return the parsed JSON response.
     """
+    files_bids = [_storm_prepare_item(f) for f in filenames]
     j = {
-        "blocks": [_storm_prepare_item(f) for f in filenames],
+        "blocks": [i for sublist in files_bids for i in sublist],
     }
     requests.options(
         _storm_urls(authorization)["WETRANSFER_STORM_BLOCK"],
@@ -390,43 +388,36 @@ def _storm_prepare(authorization: str, filenames: List[str]) -> dict[Any, Any]:
             "User-Agent": WETRANSFER_USER_AGENT,
         },
     )
-    return r.json()
+    return { "files_bids": files_bids, "blocks":r.json() }
 
 
 def _storm_finalize_item(
-    file: str, block_id: str
-) -> dict[str, Union[List[str], str]]:
+    file: str, block_ids: List[str]
+) -> Dict[str, Union[List[str], str]]:
     """Given a file and block_id prepare the item block dictionary.
 
     Return a dictionary with "block_ids", "item_type" and "path" keys.
-
-    XXX: Is it possible to actually have more than one block?
-    XXX: If yes this - and probably other parts of the code involved with
-    XXX: blocks - needs to be instructed to handle them instead of
-    XXX: assuming that one file is associated with one block.
     """
     filename = os.path.basename(file)
 
     return {
-        "block_ids": [
-            block_id,
-        ],
+        "block_ids": block_ids,
         "item_type": "file",
         "path": filename,
     }
 
 
 def _storm_finalize(
-    authorization: str, filenames: List[str], block_ids: List[str]
-) -> dict[Any, Any]:
+    authorization: str, filenames: List[str], block_ids: List[List[str]]
+) -> Dict[Any, Any]:
     """Given an Authorization token, filenames and block ids finalize upload.
 
     Return the parsed JSON response.
     """
     j = {
         "items": [
-            _storm_finalize_item(f, bid)
-            for f, bid in zip(filenames, block_ids)
+            _storm_finalize_item(f, bids)
+            for f, bids in zip(filenames, block_ids)
         ],
     }
     requests.options(
@@ -464,32 +455,33 @@ def _storm_finalize(
     return r.json()
 
 
-def _storm_upload(url: str, file: str) -> None:
+def _storm_upload(urls: List[str], file: str) -> None:
     """Given an url and file upload it.
 
     Does not return anything.
     """
-    requests.options(
-        url,
-        headers={
-            "Origin": "https://wetransfer.com",
-            "Access-Control-Request-Method": "PUT",
-            "User-Agent": WETRANSFER_USER_AGENT,
-        },
-    )
     with open(file, "rb") as f:
-        requests.put(
-            url,
-            data=f,
-            headers={
-                "Origin": "https://wetransfer.com",
-                "Content-MD5": binascii.b2a_base64(
-                    binascii.unhexlify(_md5(file)), newline=False
-                ),
-                "X-Uploader": "storm",
-                "User-Agent": WETRANSFER_USER_AGENT,
-            },
-        )
+        for url, chunk in zip(urls,[chunk for chunk in iter(functools.partial(f.read, MAX_BLOCK_SIZE), b"")]):
+            requests.options(
+                url,
+                headers={
+                    "Origin": "https://wetransfer.com",
+                    "Access-Control-Request-Method": "PUT",
+                    "User-Agent": WETRANSFER_USER_AGENT,
+                },
+            )
+            requests.put(
+                url,
+                data=chunk,
+                headers={
+                    "Origin": "https://wetransfer.com",
+                    "Content-MD5": binascii.b2a_base64(
+                        binascii.unhexlify(hashlib.md5(chunk).hexdigest()), newline=False
+                    ),
+                    "X-Uploader": "storm",
+                    "User-Agent": WETRANSFER_USER_AGENT,
+                },
+            )
 
 
 def _finalize_upload(
@@ -583,15 +575,24 @@ def upload(
     logger.debug(f"Doing preflight storm")
     _storm_preflight(transfer["storm_upload_token"], files)
     logger.debug(f"Preparing storm block upload")
-    blocks = _storm_prepare(transfer["storm_upload_token"], files)
-    for f, b in zip(files, blocks["data"]["blocks"]):
+    prepare_data = _storm_prepare(transfer["storm_upload_token"], files)
+    blocks = prepare_data["blocks"]["data"]["blocks"]
+    start_block_index = 0
+    file_index = 0
+    file_bids = []
+    for f in files:
         logger.debug(f"Uploading file {f}")
-        _storm_upload(b["presigned_put_url"], f)
+        file_chunks_count = len(prepare_data["files_bids"][file_index])
+        file_blocks = blocks[start_block_index:start_block_index+file_chunks_count]
+        file_bids.append(file_blocks)
+        _storm_upload([b["presigned_put_url"] for b in file_blocks], f)
+        start_block_index += file_chunks_count
+        file_index += 1
     logger.debug(f"Finalizing storm batch upload")
     _storm_finalize(
         transfer["storm_upload_token"],
         files,
-        [b["block_id"] for b in blocks["data"]["blocks"]],
+        [[b["block_id"] for b in f] for f in file_bids],
     )
     logger.debug(f"Finalizing upload with transfer id {transfer['id']}")
     shortened_url = _finalize_upload(transfer["id"], s)["shortened_url"]