From 631d428708839aa067c27be352374ce8ed0bb974 Mon Sep 17 00:00:00 2001 From: Lars Michelsen Date: Thu, 30 Jan 2025 20:02:49 +0100 Subject: [PATCH] Improve job-scheduler start and stop logging Change-Id: I23268da616a39cd0e7566ead54781280e2d16a14 --- cmk/gui/job_scheduler/_background_jobs/_server.py | 10 ++++++++-- cmk/gui/job_scheduler/_scheduler.py | 4 ++++ cmk/gui/job_scheduler/main.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cmk/gui/job_scheduler/_background_jobs/_server.py b/cmk/gui/job_scheduler/_background_jobs/_server.py index 1e58817ecb9..034b9723a15 100644 --- a/cmk/gui/job_scheduler/_background_jobs/_server.py +++ b/cmk/gui/job_scheduler/_background_jobs/_server.py @@ -3,14 +3,20 @@ # This file is part of Checkmk (https://checkmk.com). It is subject to the terms and # conditions defined in the file COPYING, which is part of this source code package. +from logging import Logger + import gunicorn.app.base # type: ignore[import-untyped] from fastapi import FastAPI from ._config import ServerConfig -def run_server(config: ServerConfig, app: FastAPI) -> None: - _ApplicationServer(app, config).run() +def run_server(config: ServerConfig, app: FastAPI, logger: Logger) -> None: + logger.info("Starting background job server") + try: + _ApplicationServer(app, config).run() + finally: + logger.info("Stopped background job server") class _ApplicationServer(gunicorn.app.base.BaseApplication): # type: ignore[misc] # pylint: disable=abstract-method diff --git a/cmk/gui/job_scheduler/_scheduler.py b/cmk/gui/job_scheduler/_scheduler.py index d21e6f6c08e..d025514c94f 100644 --- a/cmk/gui/job_scheduler/_scheduler.py +++ b/cmk/gui/job_scheduler/_scheduler.py @@ -26,6 +26,7 @@ def run_scheduler_threaded( crash_report_callback: Callable[[Exception], str], stop_event: threading.Event ) -> threading.Thread: + logger.info("Starting scheduler thread") t = threading.Thread( target=_run_scheduler, args=(crash_report_callback, stop_event), @@ -39,6 +40,7 @@ def _run_scheduler( crash_report_callback: Callable[[Exception], str], stop_event: threading.Event ) -> None: job_threads: dict[str, threading.Thread] = {} + logger.info("Started scheduler") while not stop_event.is_set(): try: cycle_start = time.time() @@ -59,7 +61,9 @@ def _run_scheduler( # in case there were some locks left over store.release_all_locks() + logger.info("Waiting for jobs to finish") _wait_for_job_threads(job_threads) + logger.info("Stopped scheduler") def _load_last_job_runs() -> dict[str, datetime.datetime]: diff --git a/cmk/gui/job_scheduler/main.py b/cmk/gui/job_scheduler/main.py index 8825c9aaae2..7f9e2bc6d15 100644 --- a/cmk/gui/job_scheduler/main.py +++ b/cmk/gui/job_scheduler/main.py @@ -71,11 +71,13 @@ def main(crash_report_callback: Callable[[Exception], str]) -> int: _setup_console_logging() + # This is only an intermediate handler until gunicorn run_server sets its own handler signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(0)) daemonize() _setup_file_logging(log_path / "ui-job-scheduler.log") + logger.info("--- Starting ui-job-scheduler (Checkmk %s) ---", cmk_version.__version__) with pid_file_lock(_pid_file(omd_root)): init_span_processor( @@ -113,10 +115,17 @@ def main(crash_report_callback: Callable[[Exception], str]) -> int: registered_jobs=dict(job_registry.items()), executor=ThreadedJobExecutor(logger), ), + logger, ) + except SystemExit as exc: + logger.info("Process terminated (Exit code: %d)", exc.code) + raise finally: + logger.info("Stopping application") stop_event.set() scheduler_thread.join() + except SystemExit: + raise except Exception as exc: crash_msg = crash_report_callback(exc) logger.error("Unhandled exception (Crash ID: %s)", crash_msg, exc_info=True) @@ -136,5 +145,6 @@ def _setup_file_logging(log_file: Path) -> None: logging.Formatter("%(asctime)s [%(levelno)s] [%(process)d/%(threadName)s] %(message)s") ) logger = logging.getLogger() + del logger.handlers[:] # Remove all previously existing handlers logger.addHandler(handler) logger.setLevel(logging.INFO)