From a85979e47daae0c47f5909a8b148f8d98e5b4d51 Mon Sep 17 00:00:00 2001 From: "marius.baseten" Date: Wed, 12 Jun 2024 16:06:08 -0700 Subject: [PATCH] Show logs for docker containers if an exception was raised --- truss/tests/test_model_inference.py | 7 ++- .../test_testing_utilities_for_other_tests.py | 47 ++++++++++++++++++- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/truss/tests/test_model_inference.py b/truss/tests/test_model_inference.py index 586e257ac..217e2433d 100644 --- a/truss/tests/test_model_inference.py +++ b/truss/tests/test_model_inference.py @@ -15,7 +15,10 @@ from truss.local.local_config_handler import LocalConfigHandler from truss.model_inference import map_to_supported_python_version from truss.tests.helpers import create_truss -from truss.tests.test_testing_utilities_for_other_tests import ensure_kill_all +from truss.tests.test_testing_utilities_for_other_tests import ( + ensure_kill_all, + ensure_kill_all_and_show_logs_on_exception, +) from truss.truss_handle import TrussHandle logger = logging.getLogger(__name__) @@ -190,7 +193,7 @@ def test_requirements_file_truss(): @pytest.mark.integration @pytest.mark.parametrize("pydantic_major_version", ["1", "2"]) def test_requirements_pydantic(pydantic_major_version): - with ensure_kill_all(): + with ensure_kill_all_and_show_logs_on_exception(): truss_root = Path(__file__).parent.parent.parent.resolve() / "truss" truss_dir = truss_root / "test_data" / f"test_pyantic_v{pydantic_major_version}" diff --git a/truss/tests/test_testing_utilities_for_other_tests.py b/truss/tests/test_testing_utilities_for_other_tests.py index 8e856307f..041aa5768 100644 --- a/truss/tests/test_testing_utilities_for_other_tests.py +++ b/truss/tests/test_testing_utilities_for_other_tests.py @@ -1,7 +1,7 @@ # This file contains shared code to be used in other tests # TODO(pankaj): Using a tests file for shared code is not ideal, we should # move it to a regular file. This is a short term hack. - +import json import shutil import subprocess import time @@ -23,6 +23,51 @@ def ensure_kill_all(): ensure_free_disk_space() +# TODO: gradually add this to more tests where applicable. +@contextmanager +def ensure_kill_all_and_show_logs_on_exception(): + try: + with show_container_logs_if_raised(): + yield + finally: + kill_all_with_retries() + ensure_free_disk_space() + + +def _human_readable_json_logs(raw_logs: str) -> str: + output = [] + for line in raw_logs.splitlines(): + try: + log_entry = json.loads(line) + human_readable_log = ( + f"\t[{log_entry['asctime']}] " + f"{log_entry['levelname']}: {log_entry['message']}" + ) + output.append(human_readable_log) + except json.JSONDecodeError: + output.append(f"Failed to decode log line: {line}") + return "\n".join(output) + + +@contextmanager +def show_container_logs_if_raised(): + initial_ids = {c.id for c in get_containers({TRUSS: True})} + exception_raised = False + try: + yield + except Exception: + exception_raised = True + raise + finally: + if exception_raised: + print("An exception was raised, showing logs of all containers.") + containers = get_containers({TRUSS: True}) + new_containers = [c for c in containers if c.id not in initial_ids] + for container in new_containers: + print(f"Logs for container {container.name} ({container.id}):") + print(_human_readable_json_logs(container.logs())) + + def kill_all_with_retries(num_retries: int = 10): kill_all() attempts = 0