From fc396f7c61960f07d2721f5ff1ea00574d6b209b Mon Sep 17 00:00:00 2001 From: joostinyi <63941848+joostinyi@users.noreply.github.com> Date: Mon, 10 Jun 2024 17:23:41 +0000 Subject: [PATCH] duplicate download util for shared template --- truss/templates/shared/lazy_data_resolver.py | 2 +- truss/templates/shared/util.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/truss/templates/shared/lazy_data_resolver.py b/truss/templates/shared/lazy_data_resolver.py index baaaf4e05..700028b96 100644 --- a/truss/templates/shared/lazy_data_resolver.py +++ b/truss/templates/shared/lazy_data_resolver.py @@ -4,7 +4,7 @@ import pydantic import yaml -from truss.util.download import download_from_url_using_requests +from shared.util import download_from_url_using_requests LAZY_DATA_RESOLVER_PATH = Path("/bptr/bptr-manifest") diff --git a/truss/templates/shared/util.py b/truss/templates/shared/util.py index 6fc8f245d..302198c9d 100644 --- a/truss/templates/shared/util.py +++ b/truss/templates/shared/util.py @@ -1,10 +1,14 @@ import multiprocessing import os +import shutil import sys +from pathlib import Path from typing import Callable, Dict, List, TypeVar import psutil +import requests +BLOB_DOWNLOAD_TIMEOUT_SECS = 600 # 10 minutes # number of seconds to wait for truss server child processes before sending kill signal CHILD_PROCESS_WAIT_TIMEOUT_SECONDS = 120 @@ -85,3 +89,16 @@ def kill_child_processes(parent_pid: int): def transform_keys(d: Dict[X, Z], fn: Callable[[X], Y]) -> Dict[Y, Z]: return {fn(key): value for key, value in d.items()} + + +def download_from_url_using_requests(URL: str, download_to: Path): + # Streaming download to keep memory usage low + resp = requests.get( + URL, + allow_redirects=True, + stream=True, + timeout=BLOB_DOWNLOAD_TIMEOUT_SECS, + ) + resp.raise_for_status() + with download_to.open("wb") as file: + shutil.copyfileobj(resp.raw, file)