From 13e0e3e64e5974c4352ceb8addd6a350edd731cb Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Wed, 19 Feb 2025 15:41:45 +0000 Subject: [PATCH] host-select: fix compatibility with force-condemned hosts --- changes.d/6623.fix.md | 4 ++ cylc/flow/cfgspec/globalcfg.py | 46 +++++++++++++++++-- cylc/flow/host_select.py | 11 +++-- .../43-auto-restart-force-override-normal.t | 5 +- 4 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 changes.d/6623.fix.md diff --git a/changes.d/6623.fix.md b/changes.d/6623.fix.md new file mode 100644 index 00000000000..9e8898d7788 --- /dev/null +++ b/changes.d/6623.fix.md @@ -0,0 +1,4 @@ +Auto restart: The "force condemn" option (that tells workflows running on a +server to shutdown as opposed to migrate) hasn't worked with the host-selection +mechanism since Cylc 8.0.0. This has now been fixed and the "force condemn" +option has been restored in the documentation. diff --git a/cylc/flow/cfgspec/globalcfg.py b/cylc/flow/cfgspec/globalcfg.py index 7ca7372e442..286cba46876 100644 --- a/cylc/flow/cfgspec/globalcfg.py +++ b/cylc/flow/cfgspec/globalcfg.py @@ -826,16 +826,52 @@ def default_for( range. ''') Conf('condemned', VDR.V_ABSOLUTE_HOST_LIST, desc=f''' - These hosts will not be used to run jobs. + List run hosts that workflows should *not* run on. - If workflows are already running on - condemned hosts, Cylc will shut them down and - restart them on different hosts. + These hosts will be subtracted from the + `available ` hosts + preventing new workflows from starting on the "condemned" host. + + Any workflows running on these hosts will either migrate + to another host, or shutdown according to + :py:mod:`the configuration `. + + This feature requires ``auto restart`` to be listed + in `global.cylc[scheduler][main loop]plugins`. + + For more information, see the + :py:mod:`auto restart ` + plugin. + + .. rubric:: Example: + + .. code-block:: cylc + + [scheduler] + [[main loop]] + # activate the "auto restart" plugin + plugins = auto restart + [[run hosts]] + # there are three hosts in the "pool" + available = host1, host2, host3 + + # however two have been taken out: + # * workflows running on "host1" will attempt to + # restart on "host3" + # * workflows running on "host2" will shutdown + condemned = host1, host2! .. seealso:: + :py:mod:`cylc.flow.main_loop.auto_restart` :ref:`auto-stop-restart` + .. versionchanged:: 8.4.2 + + The force-condemn ("!") option caused issues at workflow + startup for Cylc versions between 8.0.0 and 8.4.1 + inclusive. + .. versionchanged:: 8.0.0 {REPLACES}``[suite servers]condemned hosts``. @@ -1336,7 +1372,7 @@ def default_for( The means by which task progress messages are reported back to the running workflow. - ..rubric:: Options: + .. rubric:: Options: zmq Direct client-server TCP communication via network ports diff --git a/cylc/flow/host_select.py b/cylc/flow/host_select.py index 69e32c68a71..cf940864b90 100644 --- a/cylc/flow/host_select.py +++ b/cylc/flow/host_select.py @@ -128,6 +128,13 @@ def select_workflow_host(cached=True): # be returned with the up-to-date configuration. global_config = glbl_cfg(cached=cached) + # condemned hosts may be suffixed with an "!" to activate "force mode" + blacklist = [] + for host in global_config.get(['scheduler', 'run hosts', 'condemned'], []): + if host.endswith('!'): + host = host[:-1] + blacklist.append(host) + return select_host( # list of workflow hosts global_config.get([ @@ -138,9 +145,7 @@ def select_workflow_host(cached=True): 'scheduler', 'run hosts', 'ranking' ]), # list of condemned hosts - blacklist=global_config.get( - ['scheduler', 'run hosts', 'condemned'] - ), + blacklist=blacklist, blacklist_name='condemned host' ) diff --git a/tests/functional/restart/43-auto-restart-force-override-normal.t b/tests/functional/restart/43-auto-restart-force-override-normal.t index b61d08c68cb..6aebc2a70a8 100644 --- a/tests/functional/restart/43-auto-restart-force-override-normal.t +++ b/tests/functional/restart/43-auto-restart-force-override-normal.t @@ -50,7 +50,10 @@ create_test_global_config '' " ${BASE_GLOBAL_CONFIG} [scheduler] [[run hosts]] - available = ${CYLC_TEST_HOST_1} + available = ${CYLC_TEST_HOST_1}, ${CYLC_TEST_HOST_2} + # ensure the workflow can start if a host is force-condemned + # see #6623 + condemned = ${CYLC_TEST_HOST_2}! " set_test_number 8