From 3f6f2ea1847c391a0dd0db95c1746025a92904b8 Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Tue, 12 Nov 2024 22:46:46 +0800 Subject: [PATCH 01/14] LIU-420: Prototype support for remote client configuration (Incomplete) - Basic use of SLURM 'template' script - Skeleton use of .ini file for environment variables - Added unittests to help prototyping --- daliuge-engine/dlg/deploy/configs/__init__.py | 10 ++ daliuge-engine/dlg/deploy/configs/default.ini | 12 ++ .../dlg/deploy/configs/default_slurm.template | 11 ++ daliuge-engine/dlg/deploy/configs/setonix.ini | 11 ++ .../dlg/deploy/configs/setonix_slurm.template | 11 ++ daliuge-engine/dlg/deploy/create_dlg_job.py | 36 ++++- daliuge-engine/dlg/deploy/deployment_utils.py | 3 + daliuge-engine/dlg/deploy/slurm_client.py | 134 ++++++++++++++---- .../test/deploy/example_template.slurm | 12 ++ daliuge-engine/test/deploy/setonix.ini | 11 ++ daliuge-engine/test/deploy/slurm_script.sh | 13 ++ .../test/deploy/slurm_script_from_template.sh | 14 ++ .../test/deploy/test_slurm_client.py | 84 +++++++++++ 13 files changed, 335 insertions(+), 27 deletions(-) create mode 100644 daliuge-engine/dlg/deploy/configs/default.ini create mode 100644 daliuge-engine/dlg/deploy/configs/default_slurm.template create mode 100644 daliuge-engine/dlg/deploy/configs/setonix.ini create mode 100644 daliuge-engine/dlg/deploy/configs/setonix_slurm.template create mode 100644 daliuge-engine/test/deploy/example_template.slurm create mode 100644 daliuge-engine/test/deploy/setonix.ini create mode 100644 daliuge-engine/test/deploy/slurm_script.sh create mode 100644 daliuge-engine/test/deploy/slurm_script_from_template.sh create mode 100644 daliuge-engine/test/deploy/test_slurm_client.py diff --git a/daliuge-engine/dlg/deploy/configs/__init__.py b/daliuge-engine/dlg/deploy/configs/__init__.py index 0429e6466..36ccc0159 100644 --- a/daliuge-engine/dlg/deploy/configs/__init__.py +++ b/daliuge-engine/dlg/deploy/configs/__init__.py @@ -52,6 +52,16 @@ $EXEC_PREFIX $PY_BIN -m dlg.deploy.start_dlg_cluster --log_dir $LOG_DIR $GRAPH_PAR $PROXY_PAR $GRAPH_VIS_PAR $LOGV_PAR $ZERORUN_PAR $MAXTHREADS_PAR $SNC_PAR $NUM_ISLANDS_PAR $ALL_NICS $CHECK_WITH_SESSION --ssid $SESSION_ID """ + +__slurm_shebang = "#!/bin/bash --login" + +dlg_exec_str = ( + "$EXEC_PREFIX $PY_BIN -m dlg.deploy.start_dlg_cluster" + " --log_dir $LOG_DIR $GRAPH_PAR $PROXY_PAR $GRAPH_VIS_PAR $LOGV_PAR $ZERORUN_PAR" + " $MAXTHREADS_PAR $SNC_PAR $NUM_ISLANDS_PAR $ALL_NICS $CHECK_WITH_SESSION" + " --ssid $SESSION_ID" +) + init_tpl = string.Template(__sub_tpl_str) diff --git a/daliuge-engine/dlg/deploy/configs/default.ini b/daliuge-engine/dlg/deploy/configs/default.ini new file mode 100644 index 000000000..e71016657 --- /dev/null +++ b/daliuge-engine/dlg/deploy/configs/default.ini @@ -0,0 +1,12 @@ +[ENVIRONMENT] +USER = +ACCOUNT = +LOGIN_NODE = +HOME_DIR = "" # This will default to the +DLG_ROOT = ${HOME_DIR}/dlg +LOG_DIR = ${DLG_ROOT}/log +MODULES = "" +VENV = ${DLG_ROOT}/venv +DEFAULT_MON_HOST = "dlg-mon.icrar.org" +DEFAULT_MON_PORT = 8898 +EXEC_PREFIX = "srun -l" \ No newline at end of file diff --git a/daliuge-engine/dlg/deploy/configs/default_slurm.template b/daliuge-engine/dlg/deploy/configs/default_slurm.template new file mode 100644 index 000000000..fd807313d --- /dev/null +++ b/daliuge-engine/dlg/deploy/configs/default_slurm.template @@ -0,0 +1,11 @@ +#!/bin/bash --login + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=2 +#SBATCH --job-name=DALiuGE-$SESSION_ID +#SBATCH --time=30 +#SBATCH --error=err-%j.log +export DLG_ROOT=$DLG_ROOT + +VENV=$DLG_ROOT/venv \ No newline at end of file diff --git a/daliuge-engine/dlg/deploy/configs/setonix.ini b/daliuge-engine/dlg/deploy/configs/setonix.ini new file mode 100644 index 000000000..f8eb369b2 --- /dev/null +++ b/daliuge-engine/dlg/deploy/configs/setonix.ini @@ -0,0 +1,11 @@ +[ENVIRONMENT] +ACCOUNT = pawsey0411 +LOGIN_NODE = setonix.pawsey.org.au +DEFAULT_MON_HOST = dlg-mon.icrar.org +DEFAULT_MON_PORT = 8898 +EXEC_PREFIX = srun -l +HOME_DIR = /scratch/${ACCOUNT} +DLG_ROOT = ${HOME_DIR}/${USER}/dlg +LOG_DIR = ${DLG_ROOT}/log +MODULES = "" +VENV = f"source /software/projects/{ACCOUNT}/venv/bin/activate" diff --git a/daliuge-engine/dlg/deploy/configs/setonix_slurm.template b/daliuge-engine/dlg/deploy/configs/setonix_slurm.template new file mode 100644 index 000000000..67dcdaeff --- /dev/null +++ b/daliuge-engine/dlg/deploy/configs/setonix_slurm.template @@ -0,0 +1,11 @@ +#!/bin/bash --login + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=2 +#SBATCH --job-name=DALiuGE-$SESSION_ID +#SBATCH --time=00:30:00 +#SBATCH --error=err-%j.log + +export DLG_ROOT=$DLG_ROOT +source /software/projects/pawsey0411/venv/bin/activate \ No newline at end of file diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 76753d4d5..553369dca 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -37,6 +37,8 @@ import time import os +from pathlib import Path + from dlg.deploy.configs import ( ConfigFactory, ) # get all available configurations @@ -403,6 +405,16 @@ def check_log_dir(self, log_dir): return True return False +def load_client_config(cfg_path: Path): + """ + """ + from configparser import ConfigParser, ExtendedInterpolation + parser = ConfigParser() #interpolation=ExtendedInterpolation()) + parser.read(cfg_path) + print(dict(parser["ENVIRONMENT"])) + + + def main(): parser = optparse.OptionParser( @@ -623,7 +635,7 @@ def main(): "--configs", dest="configs", action="store_true", - help="Display the available configurations and exit", + help="Display the available configurations and exit", default=False, ) parser.add_option( @@ -635,6 +647,22 @@ def main(): help="Remote username, if different from local", default=None, ) + parser.add_option( + "--config_file", + dest="config_file", + type="string", + action="store", + help="Use INI configuration file.", + default=None + ) + parser.add_option( + "--slurm_template", + dest="slurm_template", + type="string", + action="store", + help="Use SLURM template file for job submission", + default=None + ) (opts, _) = parser.parse_args(sys.argv) if opts.configs: @@ -715,6 +743,10 @@ def main(): else: pgt_file = path_to_graph_file + # if opts.config_file: + # load_client_config(Path(opts.config_file)) + # sys.exit(0) + client = SlurmClient( dlg_root=opts.dlg_root, log_root=opts.log_root, @@ -734,6 +766,8 @@ def main(): submit=opts.submit, remote=opts.remote, username=opts.username, + config=opts.config, + slurm_template=opts.slurm_template ) client._visualise_graph = opts.visualise_graph client.submit_job() diff --git a/daliuge-engine/dlg/deploy/deployment_utils.py b/daliuge-engine/dlg/deploy/deployment_utils.py index 1ee31038a..8ddac9139 100644 --- a/daliuge-engine/dlg/deploy/deployment_utils.py +++ b/daliuge-engine/dlg/deploy/deployment_utils.py @@ -122,6 +122,9 @@ def find_numislands(physical_graph_template_file): init already. TODO: We will probably need to do the same with job duration and CPU number """ + if not physical_graph_template_file: + return None, None, physical_graph_template_file + with open(physical_graph_template_file, "r") as f: pgt_data = json.load(f, strict=False) try: diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py index 08e34f402..3d6ce133f 100644 --- a/daliuge-engine/dlg/deploy/slurm_client.py +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -32,7 +32,7 @@ from dlg import remote from dlg.runtime import __git_version__ as git_commit -from dlg.deploy.configs import ConfigFactory, init_tpl +from dlg.deploy.configs import ConfigFactory, init_tpl, dlg_exec_str from dlg.deploy.configs import DEFAULT_MON_PORT, DEFAULT_MON_HOST from dlg.deploy.deployment_utils import find_numislands, label_job_dur from paramiko.ssh_exception import SSHException @@ -78,23 +78,52 @@ def __init__( remote=True, pip_name=None, username=None, + config=None, + slurm_template=None, + suffix=None ): - self._config = ConfigFactory.create_config(facility=facility, user=username) - self.host = self._config.getpar("host") if host is None else host - self._acc = self._config.getpar("account") if (acc is None) else acc - self._user = self._config.getpar("user") if (username is None) else username - self.dlg_root = self._config.getpar("dlg_root") if not dlg_root else dlg_root - self._log_root = ( - self._config.getpar("log_root") if (log_root is None) else log_root - ) - self.modules = self._config.getpar("modules") - self.venv = self._config.getpar("venv") - self.exec_prefix = self._config.getpar("exec_prefix") - if num_nodes is None: + + ## TODO + ## Here, we want to separate out the following + ## Config derived from CONFIG Factory - we replace with ini file + ## Config derived from CLI, intended for replacement in the SLURM job script + ## - We want to replace these directives with the SLURM template + ## Config derived from CLI that is used in the final script call + ## Any leftover config - we keep as normal + + if config: + # Do the config from the config file + self.host = config['host'] + else: + # Setup SLURM environment variables using config + self._config = ConfigFactory.create_config(facility=facility, user=username) + self.host = self._config.getpar("host") if host is None else host + self._acc = self._config.getpar("account") if (acc is None) else acc + # self._user = self._config.getpar("user") if (username is None) else username + + # environment & sbatch + self.dlg_root = self._config.getpar("dlg_root") if not dlg_root else dlg_root + self.modules = self._config.getpar("modules") + self.venv = self._config.getpar("venv") + self.exec_prefix = self._config.getpar("exec_prefix") + + # sbatch + if slurm_template: + self._slurm_template = slurm_template self._num_nodes = 1 + self._job_dur = 1 else: - self._num_nodes = num_nodes - self._job_dur = job_dur + if num_nodes is None: + self._num_nodes = 1 + else: + self._num_nodes = num_nodes + self._job_dur = job_dur + + # self._log_root = ( + # self._config.getpar("log_root") if (log_root is None) else log_root + # ) + # + # start_dlg_cluster arguments self._logical_graph = logical_graph self._physical_graph_template_file = physical_graph_template_file self._visualise_graph = False @@ -113,14 +142,24 @@ def __init__( self._all_nics = all_nics self._check_with_session = check_with_session self._submit = submit - self._remote = remote - self._dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f + self._suffix = self.create_session_suffix(suffix) ni, nn, self._pip_name = find_numislands(self._physical_graph_template_file) if isinstance(ni, int) and ni >= self._num_islands: self._num_islands = ni if nn and nn >= self._num_nodes: self._num_nodes = nn + + # used for remote login/directory management. + self._remote = remote self.username = username + + + def create_session_suffix(self, suffix=None): + if not suffix: + datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") + else: + return suffix + def get_session_dirname(self): """ @@ -131,7 +170,23 @@ def get_session_dirname(self): # dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f graph_name = self._pip_name.split("_")[0] # use only the part of the graph name graph_name = graph_name.rsplit(".pgt.graph")[0] - return "{0}_{1}".format(graph_name, self._dtstr) + return "{0}_{1}".format(graph_name, self._suffix) + + def process_config(config_file): + """ + Use configparser to process INI file + + :returns: dict, config information + """ + from configparser import ConfigParser + parser = ConfigParser() #interpolation=ExtendedInterpolation()) + parser.read(config_file) + return (dict(parser["ENVIRONMENT"])) + + def apply_slurm_template(self,template_str, session_id, dlg_root): + import string + intermed_slurm = string.Template(template_str) + return intermed_slurm.safe_substitute(session_id=session_id, dlg_root=dlg_root) def create_job_desc(self, physical_graph_file): """ @@ -140,12 +195,26 @@ def create_job_desc(self, physical_graph_file): session_dir = "{0}/workspace/{1}".format( self.dlg_root, self.get_session_dirname() ) + pardict = dict() + pardict["SESSION_ID"] = os.path.split(session_dir)[-1] + pardict["MODULES"] = self.modules + pardict["DLG_ROOT"] = self.dlg_root + pardict["EXEC_PREFIX"] = self.exec_prefix + slurm_str = dlg_exec_str + if self._slurm_template: + intermed_slurm = self.apply_slurm_template( + self._slurm_template, + pardict["SESSION_ID"], + pardict["DLG_ROOT"] + ) + slurm_str = intermed_slurm + "\n\n" + dlg_exec_str + else: + pardict["NUM_NODES"] = str(self._num_nodes) + pardict["JOB_DURATION"] = label_job_dur(self._job_dur) + pardict["VENV"] = self.venv - pardict["NUM_NODES"] = str(self._num_nodes) pardict["PIP_NAME"] = self._pip_name - pardict["SESSION_ID"] = os.path.split(session_dir)[-1] - pardict["JOB_DURATION"] = label_job_dur(self._job_dur) pardict["ACCOUNT"] = self._acc pardict["PY_BIN"] = "python3" if pardict["VENV"] else sys.executable pardict["LOG_DIR"] = session_dir @@ -173,11 +242,11 @@ def create_job_desc(self, physical_graph_file): pardict["CHECK_WITH_SESSION"] = ( "--check_with_session" if self._check_with_session else "" ) - pardict["MODULES"] = self.modules - pardict["DLG_ROOT"] = self.dlg_root - pardict["EXEC_PREFIX"] = self.exec_prefix - - job_desc = init_tpl.safe_substitute(pardict) + if self._slurm_template: + import string + job_desc = string.Template(slurm_str).safe_substitute(pardict) + else: + job_desc = init_tpl.safe_substitute(pardict) return job_desc def mk_session_dir(self, dlg_root: str = ""): @@ -272,3 +341,16 @@ def submit_job(self): else: print(f"Created job submission script {job_file_name}") return jobId + + +# class ConfigManager: +# """ +# """ + +# def process_config(): +# pass + +# def create_slurm_script(): +# """ +# """ +# job_desc = init_tpl.safe_substitute(pardict) diff --git a/daliuge-engine/test/deploy/example_template.slurm b/daliuge-engine/test/deploy/example_template.slurm new file mode 100644 index 000000000..5b9a8e68e --- /dev/null +++ b/daliuge-engine/test/deploy/example_template.slurm @@ -0,0 +1,12 @@ +#!/bin/bash --login + +#SBATCH --nodes=16 +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=0 +#SBATCH --job-name=DALiuGE-$SESSION_ID +#SBATCH --time=00:60:00 +#SBATCH --error=err-%j.log +export DLG_ROOT=$DLG_ROOT + +VENV=$DLG_ROOT/venv \ No newline at end of file diff --git a/daliuge-engine/test/deploy/setonix.ini b/daliuge-engine/test/deploy/setonix.ini new file mode 100644 index 000000000..f8eb369b2 --- /dev/null +++ b/daliuge-engine/test/deploy/setonix.ini @@ -0,0 +1,11 @@ +[ENVIRONMENT] +ACCOUNT = pawsey0411 +LOGIN_NODE = setonix.pawsey.org.au +DEFAULT_MON_HOST = dlg-mon.icrar.org +DEFAULT_MON_PORT = 8898 +EXEC_PREFIX = srun -l +HOME_DIR = /scratch/${ACCOUNT} +DLG_ROOT = ${HOME_DIR}/${USER}/dlg +LOG_DIR = ${DLG_ROOT}/log +MODULES = "" +VENV = f"source /software/projects/{ACCOUNT}/venv/bin/activate" diff --git a/daliuge-engine/test/deploy/slurm_script.sh b/daliuge-engine/test/deploy/slurm_script.sh new file mode 100644 index 000000000..e342cc6bb --- /dev/null +++ b/daliuge-engine/test/deploy/slurm_script.sh @@ -0,0 +1,13 @@ +#!/bin/bash --login + +#SBATCH --nodes=6 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=2 +#SBATCH --job-name=DALiuGE-EAGLE_TestSession +#SBATCH --time=00:60:00 +#SBATCH --error=err-%j.log + +export DLG_ROOT=/scratch/pawsey0411/$USER/dlg +source /software/projects/pawsey0411/venv/bin/activate + +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/$USER/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file diff --git a/daliuge-engine/test/deploy/slurm_script_from_template.sh b/daliuge-engine/test/deploy/slurm_script_from_template.sh new file mode 100644 index 000000000..3364c83db --- /dev/null +++ b/daliuge-engine/test/deploy/slurm_script_from_template.sh @@ -0,0 +1,14 @@ +#!/bin/bash --login + +#SBATCH --nodes=16 +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=4 +#SBATCH --mem=0 +#SBATCH --job-name=DALiuGE-EAGLE_TestSession +#SBATCH --time=00:60:00 +#SBATCH --error=err-%j.log + +export DLG_ROOT=/scratch/pawsey0411/$USER/dlg +source /software/projects/pawsey0411/venv/bin/activate + +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/$USER/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py new file mode 100644 index 000000000..917767c67 --- /dev/null +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -0,0 +1,84 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2024 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +import unittest +from pathlib import Path + +try: + from importlib.resources import files +except ModuleNotFoundError: + from importlib_resources import files # type: ignore +import dlg.deploy.configs as configs +import daliuge_tests.engine.graphs as test_graphs + +from dlg.deploy.slurm_client import SlurmClient +import json + +class TestSlurmClient(unittest.TestCase): + + def test_client_with_cli(self): + # Use special graph that also contains file name. See 'create_dlg_job.py' + pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" + + client = SlurmClient( + facility="setonix", + num_nodes=6, + job_dur=60, + physical_graph_template_file=str(pg), + suffix="TestSession" + ) + job_desc = client.create_job_desc(pg) + curr_file = Path(__file__) + compare_script = curr_file.parent / "sample.sh" + with compare_script.open() as fp: + script = fp.read() + self.assertEqual(script, job_desc) + + def test_client_with_configfile(self): + """ + Using the INI file, test: + - That we produce the same as the CLI with the same parameters + - That we can use the INI file to produce alternative parameters + """ + client = SlurmClient() + + job_desc = client.create_job_desc() + + + def test_client_with_slurm_template(self): + pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" + with ("exampe_template.slurm").open('r') as fp: + slurm_template = fp.read() + client = SlurmClient( + facility="setonix", + physical_graph_template_file=str(pg), + suffix="TestSession", + slurm_template=slurm_template + ) + job_desc = client.create_job_desc(pg) + curr_file = Path(__file__) + compare_script = curr_file.parent / "slurm_script_from_template.sh" + with open('output.sh', 'w') as fp: + fp.write(job_desc) + with compare_script.open() as fp: + script = fp.read() + self.assertEqual(script, job_desc) From 1a324097652bcc99dfd52bbb3312c87c8fe23c59 Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Wed, 13 Nov 2024 14:10:26 +0800 Subject: [PATCH 02/14] LIU-420: Finalise prototype config behaviour - Tests pass for config and slurm template - Tested on Setonix --- daliuge-engine/dlg/deploy/configs/default.ini | 9 +-- .../{default_slurm.template => default.slurm} | 0 daliuge-engine/dlg/deploy/configs/setonix.ini | 10 +-- .../{setonix_slurm.template => setonix.slurm} | 4 +- daliuge-engine/dlg/deploy/create_dlg_job.py | 78 ++++++++++++------- daliuge-engine/dlg/deploy/slurm_client.py | 64 ++++++++------- .../test/deploy/example_template.slurm | 6 +- daliuge-engine/test/deploy/setonix.ini | 12 +-- daliuge-engine/test/deploy/slurm_script.sh | 6 +- .../test/deploy/slurm_script_from_template.sh | 6 +- .../test/deploy/test_slurm_client.py | 40 ++++++++-- 11 files changed, 141 insertions(+), 94 deletions(-) rename daliuge-engine/dlg/deploy/configs/{default_slurm.template => default.slurm} (100%) rename daliuge-engine/dlg/deploy/configs/{setonix_slurm.template => setonix.slurm} (84%) diff --git a/daliuge-engine/dlg/deploy/configs/default.ini b/daliuge-engine/dlg/deploy/configs/default.ini index e71016657..fbc6b4c0c 100644 --- a/daliuge-engine/dlg/deploy/configs/default.ini +++ b/daliuge-engine/dlg/deploy/configs/default.ini @@ -1,12 +1,11 @@ +; This will be replaced by the above definition when processed [ENVIRONMENT] USER = ACCOUNT = LOGIN_NODE = -HOME_DIR = "" # This will default to the -DLG_ROOT = ${HOME_DIR}/dlg +HOME_DIR = +DLG_ROOT = ${HOME_DIR}/dlg LOG_DIR = ${DLG_ROOT}/log -MODULES = "" +MODULES = VENV = ${DLG_ROOT}/venv -DEFAULT_MON_HOST = "dlg-mon.icrar.org" -DEFAULT_MON_PORT = 8898 EXEC_PREFIX = "srun -l" \ No newline at end of file diff --git a/daliuge-engine/dlg/deploy/configs/default_slurm.template b/daliuge-engine/dlg/deploy/configs/default.slurm similarity index 100% rename from daliuge-engine/dlg/deploy/configs/default_slurm.template rename to daliuge-engine/dlg/deploy/configs/default.slurm diff --git a/daliuge-engine/dlg/deploy/configs/setonix.ini b/daliuge-engine/dlg/deploy/configs/setonix.ini index f8eb369b2..a374aad15 100644 --- a/daliuge-engine/dlg/deploy/configs/setonix.ini +++ b/daliuge-engine/dlg/deploy/configs/setonix.ini @@ -1,11 +1,11 @@ +; This will be replaced by the above definition when processed [ENVIRONMENT] +USER = rbunney ACCOUNT = pawsey0411 LOGIN_NODE = setonix.pawsey.org.au -DEFAULT_MON_HOST = dlg-mon.icrar.org -DEFAULT_MON_PORT = 8898 -EXEC_PREFIX = srun -l HOME_DIR = /scratch/${ACCOUNT} DLG_ROOT = ${HOME_DIR}/${USER}/dlg LOG_DIR = ${DLG_ROOT}/log -MODULES = "" -VENV = f"source /software/projects/{ACCOUNT}/venv/bin/activate" +MODULES = +VENV = source /software/projects/${ACCOUNT}/venv/bin/activate +EXEC_PREFIX = srun -l \ No newline at end of file diff --git a/daliuge-engine/dlg/deploy/configs/setonix_slurm.template b/daliuge-engine/dlg/deploy/configs/setonix.slurm similarity index 84% rename from daliuge-engine/dlg/deploy/configs/setonix_slurm.template rename to daliuge-engine/dlg/deploy/configs/setonix.slurm index 67dcdaeff..cd0020b98 100644 --- a/daliuge-engine/dlg/deploy/configs/setonix_slurm.template +++ b/daliuge-engine/dlg/deploy/configs/setonix.slurm @@ -1,10 +1,10 @@ #!/bin/bash --login -#SBATCH --nodes=1 +#SBATCH --nodes=2 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=2 #SBATCH --job-name=DALiuGE-$SESSION_ID -#SBATCH --time=00:30:00 +#SBATCH --time=00:45:00 #SBATCH --error=err-%j.log export DLG_ROOT=$DLG_ROOT diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 553369dca..5114988ac 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -405,15 +405,52 @@ def check_log_dir(self, log_dir): return True return False -def load_client_config(cfg_path: Path): +def process_config(config_file: str): """ - """ - from configparser import ConfigParser, ExtendedInterpolation - parser = ConfigParser() #interpolation=ExtendedInterpolation()) - parser.read(cfg_path) - print(dict(parser["ENVIRONMENT"])) + Use configparser to process INI file + + Current functionality: + - Returns remote environment config (e.g. DLG_ROOT, HOME etc.) + Future Functionality: + - Graph translation parameters + - Engine parameters + :returns: dict, config information + """ + from configparser import ConfigParser, ExtendedInterpolation + parser = ConfigParser(interpolation=ExtendedInterpolation()) + parser.read(config_file) + return (dict(parser["ENVIRONMENT"])) + +def process_slurm_template(template_file: str): + template = Path(template_file) + with template.open('r') as fp: + return fp.read() + +def create_experiment_group(parser: optparse.OptionParser): + from optparse import OptionGroup + group=OptionGroup(parser, "Experimental Options", + "Caution: These are not properly tested and likely to" + "be rough around the edges.") + + group.add_option( + "--config_file", + dest="config_file", + type="string", + action="store", + help="Use INI configuration file.", + default=None + ) + group.add_option( + "--slurm_template", + dest="slurm_template", + type="string", + action="store", + help="Use SLURM template file for job submission. WARNING: Using this command will over-write other job-parameters passed here.", + default=None + ) + return group def main(): @@ -647,22 +684,8 @@ def main(): help="Remote username, if different from local", default=None, ) - parser.add_option( - "--config_file", - dest="config_file", - type="string", - action="store", - help="Use INI configuration file.", - default=None - ) - parser.add_option( - "--slurm_template", - dest="slurm_template", - type="string", - action="store", - help="Use SLURM template file for job submission", - default=None - ) + + parser.add_option_group(create_experiment_group(parser)) (opts, _) = parser.parse_args(sys.argv) if opts.configs: @@ -743,9 +766,9 @@ def main(): else: pgt_file = path_to_graph_file - # if opts.config_file: - # load_client_config(Path(opts.config_file)) - # sys.exit(0) + config = process_config(opts.config_file) if opts.config_file else None + template = process_slurm_template( + opts.slurm_template) if opts.slurm_template else None client = SlurmClient( dlg_root=opts.dlg_root, @@ -766,9 +789,10 @@ def main(): submit=opts.submit, remote=opts.remote, username=opts.username, - config=opts.config, - slurm_template=opts.slurm_template + config=config, + slurm_template=template ) + client._visualise_graph = opts.visualise_graph client.submit_job() else: diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py index 3d6ce133f..f29c57158 100644 --- a/daliuge-engine/dlg/deploy/slurm_client.py +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -93,26 +93,36 @@ def __init__( if config: # Do the config from the config file - self.host = config['host'] + self.host = config['login_node'] + self._acc = config['account'] # superceded by slurm_template if present + self.dlg_root = config['dlg_root'] + self.modules = config['modules'] + self.venv = config['venv'] # superceded by slurm_template if present + self.exec_prefix = config["exec_prefix"] + self.username = config['user'] if 'user' in config else sys.exit(1) + if not self.username: + print("Username not configured in INI file.") + sys.exit(1) else: # Setup SLURM environment variables using config - self._config = ConfigFactory.create_config(facility=facility, user=username) - self.host = self._config.getpar("host") if host is None else host - self._acc = self._config.getpar("account") if (acc is None) else acc - # self._user = self._config.getpar("user") if (username is None) else username + config = ConfigFactory.create_config(facility=facility, user=username) + self.host = config.getpar("host") if host is None else host + self._acc = config.getpar("account") if (acc is None) else acc + # self._user = config.getpar("user") if (username is None) else username # environment & sbatch - self.dlg_root = self._config.getpar("dlg_root") if not dlg_root else dlg_root - self.modules = self._config.getpar("modules") - self.venv = self._config.getpar("venv") - self.exec_prefix = self._config.getpar("exec_prefix") - + self.dlg_root = config.getpar("dlg_root") if not dlg_root else dlg_root + self.modules = config.getpar("modules") + self.venv = config.getpar("venv") + self.exec_prefix = config.getpar("exec_prefix") + self.username = username # sbatch if slurm_template: self._slurm_template = slurm_template - self._num_nodes = 1 - self._job_dur = 1 + self._num_nodes = 1 # placeholder + self._job_dur = 1 # placeholder else: + self._slurm_template = None if num_nodes is None: self._num_nodes = 1 else: @@ -151,39 +161,24 @@ def __init__( # used for remote login/directory management. self._remote = remote - self.username = username def create_session_suffix(self, suffix=None): if not suffix: - datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") + return datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") else: return suffix - def get_session_dirname(self): """ (pipeline name_)[Nnum_of_daliuge_nodes]_[time_stamp] """ - # Moved setting of dtstr to init - # to ensure it doesn't change for this instance of SlurmClient() - # dtstr = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # .%f graph_name = self._pip_name.split("_")[0] # use only the part of the graph name graph_name = graph_name.rsplit(".pgt.graph")[0] return "{0}_{1}".format(graph_name, self._suffix) - def process_config(config_file): - """ - Use configparser to process INI file - :returns: dict, config information - """ - from configparser import ConfigParser - parser = ConfigParser() #interpolation=ExtendedInterpolation()) - parser.read(config_file) - return (dict(parser["ENVIRONMENT"])) - - def apply_slurm_template(self,template_str, session_id, dlg_root): + def apply_slurm_template(self, template_str, session_id, dlg_root): import string intermed_slurm = string.Template(template_str) return intermed_slurm.safe_substitute(session_id=session_id, dlg_root=dlg_root) @@ -208,6 +203,7 @@ def create_job_desc(self, physical_graph_file): pardict["SESSION_ID"], pardict["DLG_ROOT"] ) + print("Creating job description") slurm_str = intermed_slurm + "\n\n" + dlg_exec_str else: pardict["NUM_NODES"] = str(self._num_nodes) @@ -254,6 +250,7 @@ def mk_session_dir(self, dlg_root: str = ""): Create the session directory. If dlg_root is provided it is used, else env var DLG_ROOT is used. """ + if dlg_root: # has always preference self.dlg_root = dlg_root if self._remote and not self.dlg_root: @@ -278,11 +275,10 @@ def mk_session_dir(self, dlg_root: str = ""): ) try: remote.execRemote(self.host, command, username=self.username) - except (TypeError, SSHException): + except (TypeError, SSHException) as e: print( - f"ERROR: Unable to create {session_dir} on {self.username}@{self.host}" + f"ERROR: Unable to create {session_dir} on {self.username}@{self.host}, {str(e)}" ) - sys.exit() return session_dir @@ -309,6 +305,8 @@ def submit_job(self): job_file_name = "{0}/jobsub.sh".format(session_dir) job_desc = self.create_job_desc(physical_graph_file_name) + print(job_desc) + # sys.exit() if self._remote: print(f"Creating SLURM script remotely: {job_file_name}") tjob = tempfile.mktemp() @@ -333,7 +331,7 @@ def submit_job(self): ) if exitStatus != 0: print( - f"Job submission unsuccessful: {exitStatus.decode()}, {stderr.decode()}" + f"Job submission unsuccessful: {exitStatus}, {stderr.decode()}" ) else: jobId = stdout.decode() diff --git a/daliuge-engine/test/deploy/example_template.slurm b/daliuge-engine/test/deploy/example_template.slurm index 5b9a8e68e..c72e5ef18 100644 --- a/daliuge-engine/test/deploy/example_template.slurm +++ b/daliuge-engine/test/deploy/example_template.slurm @@ -5,8 +5,8 @@ #SBATCH --cpus-per-task=4 #SBATCH --mem=0 #SBATCH --job-name=DALiuGE-$SESSION_ID -#SBATCH --time=00:60:00 +#SBATCH --time=00:45:00 #SBATCH --error=err-%j.log -export DLG_ROOT=$DLG_ROOT -VENV=$DLG_ROOT/venv \ No newline at end of file +export DLG_ROOT=$DLG_ROOT +source /software/projects/pawsey0411/venv/bin/activate \ No newline at end of file diff --git a/daliuge-engine/test/deploy/setonix.ini b/daliuge-engine/test/deploy/setonix.ini index f8eb369b2..0df0b87a3 100644 --- a/daliuge-engine/test/deploy/setonix.ini +++ b/daliuge-engine/test/deploy/setonix.ini @@ -1,11 +1,11 @@ -[ENVIRONMENT] + [ENVIRONMENT] ACCOUNT = pawsey0411 +USER = test LOGIN_NODE = setonix.pawsey.org.au -DEFAULT_MON_HOST = dlg-mon.icrar.org -DEFAULT_MON_PORT = 8898 -EXEC_PREFIX = srun -l +XEC_PREFIX = srun -l HOME_DIR = /scratch/${ACCOUNT} DLG_ROOT = ${HOME_DIR}/${USER}/dlg LOG_DIR = ${DLG_ROOT}/log -MODULES = "" -VENV = f"source /software/projects/{ACCOUNT}/venv/bin/activate" +MODULES = +VENV = source /software/projects/${ACCOUNT}/venv/bin/activate +EXEC_PREFIX = srun -l \ No newline at end of file diff --git a/daliuge-engine/test/deploy/slurm_script.sh b/daliuge-engine/test/deploy/slurm_script.sh index e342cc6bb..28bc23ccb 100644 --- a/daliuge-engine/test/deploy/slurm_script.sh +++ b/daliuge-engine/test/deploy/slurm_script.sh @@ -4,10 +4,10 @@ #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=2 #SBATCH --job-name=DALiuGE-EAGLE_TestSession -#SBATCH --time=00:60:00 +#SBATCH --time=00:45:00 #SBATCH --error=err-%j.log -export DLG_ROOT=/scratch/pawsey0411/$USER/dlg +export DLG_ROOT=/scratch/pawsey0411/test/dlg source /software/projects/pawsey0411/venv/bin/activate -srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/$USER/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession diff --git a/daliuge-engine/test/deploy/slurm_script_from_template.sh b/daliuge-engine/test/deploy/slurm_script_from_template.sh index 3364c83db..6c1740050 100644 --- a/daliuge-engine/test/deploy/slurm_script_from_template.sh +++ b/daliuge-engine/test/deploy/slurm_script_from_template.sh @@ -5,10 +5,10 @@ #SBATCH --cpus-per-task=4 #SBATCH --mem=0 #SBATCH --job-name=DALiuGE-EAGLE_TestSession -#SBATCH --time=00:60:00 +#SBATCH --time=00:45:00 #SBATCH --error=err-%j.log -export DLG_ROOT=/scratch/pawsey0411/$USER/dlg +export DLG_ROOT=/scratch/pawsey0411/test/dlg source /software/projects/pawsey0411/venv/bin/activate -srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/$USER/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py index 917767c67..e7ebefd0c 100644 --- a/daliuge-engine/test/deploy/test_slurm_client.py +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -42,13 +42,14 @@ def test_client_with_cli(self): client = SlurmClient( facility="setonix", num_nodes=6, - job_dur=60, + job_dur=45, physical_graph_template_file=str(pg), - suffix="TestSession" + suffix="TestSession", + username="test" ) job_desc = client.create_job_desc(pg) curr_file = Path(__file__) - compare_script = curr_file.parent / "sample.sh" + compare_script = curr_file.parent / "slurm_script.sh" with compare_script.open() as fp: script = fp.read() self.assertEqual(script, job_desc) @@ -59,20 +60,45 @@ def test_client_with_configfile(self): - That we produce the same as the CLI with the same parameters - That we can use the INI file to produce alternative parameters """ - client = SlurmClient() + from dlg.deploy.create_dlg_job import process_config + pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" + cfg_file = Path(__file__).parent / "setonix.ini" + cfg = process_config(cfg_file) + client = SlurmClient( + facility="setonix", + num_nodes=6, + job_dur=45, + physical_graph_template_file=str(pg), + suffix="TestSession", + config=cfg, + username='test' + ) - job_desc = client.create_job_desc() + job_desc = client.create_job_desc(pg) + curr_file = Path(__file__) + compare_script = curr_file.parent / "slurm_script.sh" + with open('slurm_script.sh', 'w') as fp: + fp.write(job_desc) + with compare_script.open() as fp: + script = fp.read() + self.assertEqual(script, job_desc) def test_client_with_slurm_template(self): + """ + Use 'slurm_script_from_template.sh as a comparison file to demonstrate + how the template approach gives us more options. + """ pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" - with ("exampe_template.slurm").open('r') as fp: + template = Path(__file__).parent / "example_template.slurm" + with template.open() as fp: slurm_template = fp.read() client = SlurmClient( facility="setonix", physical_graph_template_file=str(pg), suffix="TestSession", - slurm_template=slurm_template + slurm_template=slurm_template, + username='test' ) job_desc = client.create_job_desc(pg) curr_file = Path(__file__) From ba9620e2da8d16dd74079533e864c520af22d42b Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Thu, 14 Nov 2024 16:51:39 +0800 Subject: [PATCH 03/14] LIU-420: Initial documentation additions. --- .../overview.rst} | 28 ++-- docs/deployment/slurm_deployment.rst | 132 ++++++++++++++++++ docs/index.rst | 2 +- 3 files changed, 152 insertions(+), 10 deletions(-) rename docs/{deployment.rst => deployment/overview.rst} (94%) create mode 100644 docs/deployment/slurm_deployment.rst diff --git a/docs/deployment.rst b/docs/deployment/overview.rst similarity index 94% rename from docs/deployment.rst rename to docs/deployment/overview.rst index 11e4c0a68..e8a9b56e8 100644 --- a/docs/deployment.rst +++ b/docs/deployment/overview.rst @@ -9,11 +9,11 @@ As mentioned above, |daliuge| has been developed to enable processing of data fr .. _dataflow.fig.funcs: -.. figure:: images/dfms_func_as_graphs.jpg +.. figure:: ../images/dfms_func_as_graphs.jpg Graph-based Functions of the |daliuge| Prototype -The :doc:`architecture/graphs` section describes the implementation details for each function. +The :doc:`../architecture/graphs` section describes the implementation details for each function. Here we briefly discuss how they work together to fullfill the SKA requirements. * First of all, the *Logical Graph Template* (topleft in @@ -39,8 +39,8 @@ Here we briefly discuss how they work together to fullfill the SKA requirements. * Before an observation starts, the |daliuge| engine de-serializes a physical graph JSON string and turns all the nodes into Drop objects and then deploys all the Drops onto the allocated resources as per the location information stated in the physical graph. The deployment process is - facilitated through :doc:`architecture/managers`, which are daemon processes managing the deployment of Drops - onto the designated resources. Note that the :doc:`architecture/managers` do _not_ control the Drops or the execution, but they do monitor the state of them during the execution. + facilitated through :doc:`../architecture/managers`, which are daemon processes managing the deployment of Drops + onto the designated resources. Note that the :doc:`../architecture/managers` do _not_ control the Drops or the execution, but they do monitor the state of them during the execution. * Once an observation starts, the graph :ref:`graph.execution` cascades down the graph edges through either data Drops that triggers its next consumers or application Drops that produces its next outputs. When all Drops are in the **COMPLETED** state, some data Drops @@ -61,8 +61,15 @@ The translator is able to determine which of the following options is available Deployment in HPC Centers ~~~~~~~~~~~~~~~~~~~~~~~~~ +For current deployment in HPC systems that do not support OOD, please refer to :ref:`slurm_deployment`. + When trying to deploy |daliuge| inside a HPC centre the basic concept as described above does not apply, since in general it is not possible to have the managers running on nodes in a daemon-like way. Typically a user has to submit a job into a batch queue system like SLURM or Torque and that is pretty much all that can be done by a normal user. In order to address this use case, the |daliuge| code base contains example code (daliuge-engine/dlg/deploy/pawsey/start_dfms_cluster.py) which essentially allows to submit not just the workflow, but also the |daliuge| engine as a job. The first thing that job is then doing is to start the managers and then submit the graph. It also allows to start a proxy server, which provides access to the managers' web interfaces via an external machine in order to be able to monitor the running graph. The best way to get access to the |daliuge| code base is to ask the support team to create a load module specifically for |daliuge|. If that is not possible, then users can just load an appropriate Python version (3.7 or 3.8) and install |daliuge| locally. In many cases it is not possible to run docker containers on HPC infrastructure. +.. toctree:: + :maxdepth: 1 + + slurm_deployment + Deployment with OpenOnDemand ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -73,7 +80,7 @@ Importantly, the physical graph deployment is triggered by the user's browser di .. _deployment.fig.ood: -.. figure:: images/deploy_ood.jpeg +.. figure:: ../images/deploy_ood.jpeg Sequence diagram of graph deployment in OOD envrionment. @@ -92,7 +99,7 @@ The server deployment option assumes the machine hosting the translator can comm .. _deployment.fig.server: -.. figure:: images/deploy_server.jpeg +.. figure:: ../images/deploy_server.jpeg Sequence diagram of direct graph deployment. @@ -109,7 +116,7 @@ locally, make sure that your host descriptions in EAGLE and the translator are ' .. _deployment.fig.browser: -.. figure:: images/deploy_browser.jpeg +.. figure:: ../images/deploy_browser.jpeg Sequence diagram of restful graph deployment. @@ -128,10 +135,12 @@ The user will need to monitor the k8s environment directly. .. _deployment.fig.helm: -.. figure:: images/deploy_helm.jpeg +.. figure:: ../images/deploy_helm.jpeg Sequence diagram of graph deployment in helm environment. + + Component Deployment ==================== @@ -159,4 +168,5 @@ In order to be able to use Python components, it must be possible for the engine docker exec -ti daliuge-engine bash -c "pip install --prefix=\$DLG_ROOT/code dlg_example_cmpts" -Please note that the '\' character is required for this to work correctly. In the case of running |daliuge| in docker containers $DLG_ROOT is mounted from the host and thus also the subdirectory code is visible directly on the host. In a typical HPC deployment scenario that directory will be on the user's home directory, or a shared volume, visible to all compute nodes. \ No newline at end of file +Please note that the '\' character is required for this to work correctly. In the case of running |daliuge| in docker containers $DLG_ROOT is mounted from the host and thus also the subdirectory code is visible directly on the host. In a typical HPC deployment scenario that directory will be on the user's home directory, or a shared volume, visible to all compute nodes. + diff --git a/docs/deployment/slurm_deployment.rst b/docs/deployment/slurm_deployment.rst new file mode 100644 index 000000000..7f5e613ed --- /dev/null +++ b/docs/deployment/slurm_deployment.rst @@ -0,0 +1,132 @@ +.. _slurm_deployment: + +Slurm Deployment +===================================== + +Usage and options +----------------- + +- Non-OOD support requires the use of the create_dlg_job.py script. + +Script has two configuration approaches: + +- Command line interface (CLI) +- Configuration files: + - Environment INI [Experimental] + - Slurm template [Experimental] + +Command-line Interface (CLI) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CLI allows the user to submit a remote SLURM job from their local machine, which will +spin up the requested number of DALiuGE Island and Node Managers and run the graph. + +The minimal requirements for submitting a job via the command-line are: + +- The facility (e.g. Setonix, Hyades, Galaxy) +- The graph (either logical or physical, but not both). +- Specifying if remote or local submission +- The remote user account + +All other options have defaults provided. Thus the most basic job submission will look like:: + + python create_dlg_job.py -a 1 -f setonix -L /path/to/graph/ArrayLoop.graph -U user_name + +However, the defaults for jobs submissions will lead to limited use of the available resources (i.e. number of nodes provisioned) and won't account for specific job durations. DALiuGE Translator options are also available, so it is possible to specify what partitioning algorithm is preferred. A more complete job submission, that takes advantage of the SLURM and environment options, will look something like:: + + python create_dlg_job.py -a 1 -n 32 -s 1 -t 60 -A pso -u -f setonix -L/path/to/graph/ArrayLoop.graph -v 4 --remote --submit -U user_name + +This performs the following: + +- Submits and runs a remote job to Pawsey's Setonix (`-f setonix`) machine +- Uses 1 data island manager (-s 1) and requests 32 nodes (-n 32) for a job duration of 60 minutes (-t) +- Translates the Logical Graph (-L) using the PSO algorithm (-A PSO). + +Environment INI +~~~~~~~~~~~~~~~~~~~~~ +TBC + +SLURM Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +TBC + +Complete command-line options +----------------------------- + +Help output:: + + create_dlg_job.py -a [1|2] -f [options] + + create_dlg_job.py -h for further help + + Options: + -h, --help show this help message and exit + -a ACTION, --action=ACTION + 1 - create/submit job, 2 - analyse log + -l LOG_ROOT, --log-root=LOG_ROOT + The root directory of the log file + -d LOG_DIR, --log-dir=LOG_DIR + The directory of the log file for parsing + -L LOGICAL_GRAPH, --logical-graph=LOGICAL_GRAPH + The filename of the logical graph to deploy + -A ALGORITHM, --algorithm=ALGORITHM + The algorithm to be used for the translation + -O ALGORITHM_PARAMS, --algorithm-parameters=ALGORITHM_PARAMS + Parameters for the translation algorithm + -P PHYSICAL_GRAPH, --physical-graph=PHYSICAL_GRAPH + The filename of the physical graph (template) to + deploy + -t JOB_DUR, --job-dur=JOB_DUR + job duration in minutes + -n NUM_NODES, --num_nodes=NUM_NODES + number of compute nodes requested + -i, --visualise_graph + Whether to visualise graph (poll status) + -p, --run_proxy Whether to attach proxy server for real-time + monitoring + -m MON_HOST, --monitor_host=MON_HOST + Monitor host IP (optional) + -o MON_PORT, --monitor_port=MON_PORT + The port to bind DALiuGE monitor + -v VERBOSE_LEVEL, --verbose-level=VERBOSE_LEVEL + Verbosity level (1-3) of the DIM/NM logging + -c CSV_OUTPUT, --csvoutput=CSV_OUTPUT + CSV output file to keep the log analysis result + -z, --zerorun Generate a physical graph that takes no time to run + -y, --sleepncopy Whether include COPY in the default Component drop + -T MAX_THREADS, --max-threads=MAX_THREADS + Max thread pool size used for executing drops. 0 + (default) means no pool. + -s NUM_ISLANDS, --num_islands=NUM_ISLANDS + The number of Data Islands + -u, --all_nics Listen on all NICs for a node manager + -S, --check_with_session + Check for node managers' availability by + creating/destroy a session + -f FACILITY, --facility=FACILITY + The facility for which to create a submission job + Valid options: ['galaxy_mwa', 'galaxy_askap', + 'magnus', 'galaxy', 'setonix', 'shao', 'hyades', + 'ood', 'ood_cloud'] + --submit If set to False, the job is not submitted, but the + script is generated + --remote If set to True, the job is submitted/created for a + remote submission + -D DLG_ROOT, --dlg_root=DLG_ROOT + Overwrite the DLG_ROOT directory provided by the + config + -C, --configs Display the available configurations and exit + -U USERNAME, --username=USERNAME + Remote username, if different from local + + Experimental Options: + Caution: These are not properly tested and likely tobe rough around + the edges. + + --config_file=CONFIG_FILE + Use INI configuration file. + --slurm_template=SLURM_TEMPLATE + Use SLURM template file for job submission. WARNING: + Using this command will over-write other job- + parameters passed here. + diff --git a/docs/index.rst b/docs/index.rst index 8542953a2..e9a284e55 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,7 +26,7 @@ and is performed by the `DIA team Date: Fri, 15 Nov 2024 13:11:29 +0800 Subject: [PATCH 04/14] LIU-420: More doc updates. --- daliuge-engine/dlg/deploy/create_dlg_job.py | 6 ++++ daliuge-engine/test/deploy/setonix.ini | 1 - docs/deployment/slurm_deployment.rst | 39 ++++++++++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 5114988ac..42343681c 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -452,6 +452,12 @@ def create_experiment_group(parser: optparse.OptionParser): ) return group +def create_job_group(): + pass + +def create_graph_group(): + pass + def main(): parser = optparse.OptionParser( diff --git a/daliuge-engine/test/deploy/setonix.ini b/daliuge-engine/test/deploy/setonix.ini index 0df0b87a3..67f86a849 100644 --- a/daliuge-engine/test/deploy/setonix.ini +++ b/daliuge-engine/test/deploy/setonix.ini @@ -2,7 +2,6 @@ ACCOUNT = pawsey0411 USER = test LOGIN_NODE = setonix.pawsey.org.au -XEC_PREFIX = srun -l HOME_DIR = /scratch/${ACCOUNT} DLG_ROOT = ${HOME_DIR}/${USER}/dlg LOG_DIR = ${DLG_ROOT}/log diff --git a/docs/deployment/slurm_deployment.rst b/docs/deployment/slurm_deployment.rst index 7f5e613ed..4b569e468 100644 --- a/docs/deployment/slurm_deployment.rst +++ b/docs/deployment/slurm_deployment.rst @@ -12,7 +12,7 @@ Script has two configuration approaches: - Command line interface (CLI) - Configuration files: - - Environment INI [Experimental] + - Facility INI [Experimental] - Slurm template [Experimental] Command-line Interface (CLI) @@ -42,13 +42,44 @@ This performs the following: - Uses 1 data island manager (-s 1) and requests 32 nodes (-n 32) for a job duration of 60 minutes (-t) - Translates the Logical Graph (-L) using the PSO algorithm (-A PSO). -Environment INI +Facility INI ~~~~~~~~~~~~~~~~~~~~~ -TBC +Currently, deploying onto a HPC facility requires using the facilities DALiuGE already supports, or adding a brand new class entry to the deploy/config/__init__.py file. +To make deployment more flexible and easier to expand to feasibly any facility, we have added (experimental) support for using an INI configuration file for facility deployment parameters. + +The following configuration is an example deployment that contains all variables necessary to deploy onto a remove system:: + + [ENVIRONMENT] + ACCOUNT = pawsey0411 + USER = test + LOGIN_NODE = setonix.pawsey.org.au + HOME_DIR = /scratch/${ACCOUNT} + DLG_ROOT = ${HOME_DIR}/${USER}/dlg + LOG_DIR = ${DLG_ROOT}/log + MODULES = + VENV = source /software/projects/${ACCOUNT}/venv/bin/activate + EXEC_PREFIX = srun -l + +A user can create and reference their own .ini file using these parameters, and run with the --config_file option:: + + python create_dlg_job.py -a 1 -n 1 -s 1 -u -f setonix -L ~/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/dropmake/logical_graphs/ArrayLoop.graph -v 5 --remote --submit -U rbunney --config_file example_config.ini SLURM Template ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -TBC + +A basic example that replicates the current SLURM script that is created by :code:`create_dlg_job.py`. :: + + #!/bin/bash --login + + #SBATCH --nodes=2 + #SBATCH --ntasks-per-node=1 + #SBATCH --cpus-per-task=2 + #SBATCH --job-name=DALiuGE-$SESSION_ID + #SBATCH --time=00:45:00 + #SBATCH --error=err-%j.log + + export DLG_ROOT=$DLG_ROOT + source /software/projects/pawsey0411/venv/bin/activate Complete command-line options ----------------------------- From ddb0d7bced60d82f116b33310be72914d6ea4b4a Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Fri, 15 Nov 2024 16:47:53 +0800 Subject: [PATCH 05/14] LIU-420: Finalise documentation for experimental features --- docs/deployment/slurm_deployment.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/deployment/slurm_deployment.rst b/docs/deployment/slurm_deployment.rst index 4b569e468..8402a03df 100644 --- a/docs/deployment/slurm_deployment.rst +++ b/docs/deployment/slurm_deployment.rst @@ -66,20 +66,29 @@ A user can create and reference their own .ini file using these parameters, and SLURM Template ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There are significantly more SLURM options than are practical as CLI options. The SLURM template is an experimental feature that allows you to specify additional SBATCH options that are not currently supported in the CLI. The template will be prefixed to the final SLURM script that runs the DALiuGE job on the remote system. -A basic example that replicates the current SLURM script that is created by :code:`create_dlg_job.py`. :: +A basic example that replicates the current SLURM script that is created by :code:`create_dlg_job.py` is available in dlg/deploy/config/default.slurm :: #!/bin/bash --login #SBATCH --nodes=2 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=2 - #SBATCH --job-name=DALiuGE-$SESSION_ID + #SBATCH --job-name=DALiuGE-$SESSION_ID # NECESSARY, DO NOT REMOVE #SBATCH --time=00:45:00 #SBATCH --error=err-%j.log - export DLG_ROOT=$DLG_ROOT + export DLG_ROOT=$DLG_ROOT # DO NOT CHANGE - use .INI file or CLI source /software/projects/pawsey0411/venv/bin/activate + # Keep an empty line in the file + +.. note:: + Settings defined in the SLURM template will over-write anything passed via the CLI _and_ the .INI. For example, the `source` for a virtualenv declared in the .slurm file will overwrite the VENV environment variable in the .INI file. This may change in the future depending on the extent of the features we add. + +Running with a SLURM template is similar to the .ini method: + + python create_dlg_job.py -a 1 -n 1 -s 1 -u -f setonix -L ~/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/dropmake/logical_graphs/ArrayLoop.graph -v 5 --remote --submit -U rbunney --config_file example_config.ini --slurm_template example.slurm Complete command-line options ----------------------------- From 9998d50f65054cc3fbd892e7935aa466385d756d Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Fri, 15 Nov 2024 17:02:02 +0800 Subject: [PATCH 06/14] LIU-420: Restrict client tests to full installation --- daliuge-engine/test/deploy/test_slurm_client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py index e7ebefd0c..7d5aba1e2 100644 --- a/daliuge-engine/test/deploy/test_slurm_client.py +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -21,7 +21,10 @@ # import unittest +import pytest from pathlib import Path +# Note this test will only run with a full installation of DALiuGE. +pexpect = pytest.importorskip("dlg.dropmake.pg_generator") try: from importlib.resources import files @@ -30,6 +33,7 @@ import dlg.deploy.configs as configs import daliuge_tests.engine.graphs as test_graphs + from dlg.deploy.slurm_client import SlurmClient import json From 24f82eb1d28894e23a14ca742e04fae71ec98032 Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Thu, 21 Nov 2024 23:15:41 +0800 Subject: [PATCH 07/14] LIU-420: Action sourcery code review comments. --- daliuge-engine/dlg/deploy/create_dlg_job.py | 6 ++ daliuge-engine/dlg/deploy/slurm_client.py | 102 ++++++++++-------- .../test/deploy/test_slurm_client.py | 8 +- 3 files changed, 66 insertions(+), 50 deletions(-) diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 42343681c..25725bd02 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -453,9 +453,15 @@ def create_experiment_group(parser: optparse.OptionParser): return group def create_job_group(): + """ + TODO: LIU-424 + """ pass def create_graph_group(): + """ + TODO: LIU-424 + """ pass diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py index f29c57158..806848d60 100644 --- a/daliuge-engine/dlg/deploy/slurm_client.py +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -29,6 +29,7 @@ import subprocess import shutil import tempfile +import string from dlg import remote from dlg.runtime import __git_version__ as git_commit @@ -101,8 +102,7 @@ def __init__( self.exec_prefix = config["exec_prefix"] self.username = config['user'] if 'user' in config else sys.exit(1) if not self.username: - print("Username not configured in INI file.") - sys.exit(1) + print("Username not configured in INI file, using local username...") else: # Setup SLURM environment variables using config config = ConfigFactory.create_config(facility=facility, user=username) @@ -164,6 +164,13 @@ def __init__( def create_session_suffix(self, suffix=None): + """ + Create a suffix to identify the session. If no suffix is specified, use the + current datetime setting. + + :param: suffix, used to specify a non-datetime suffix. + :return: the final suffix + """ if not suffix: return datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") else: @@ -179,35 +186,28 @@ def get_session_dirname(self): def apply_slurm_template(self, template_str, session_id, dlg_root): - import string + """ + Given a string from a template file, use a string.Template object to perform + safe substution on the string and replace $VALUES with the correct value + specified. + """ intermed_slurm = string.Template(template_str) - return intermed_slurm.safe_substitute(session_id=session_id, dlg_root=dlg_root) + ims = intermed_slurm.safe_substitute(session_id=session_id, dlg_root=dlg_root) + print("Creating job description") + return ims + "\n\n" + dlg_exec_str - def create_job_desc(self, physical_graph_file): - """ - Creates the slurm script from a physical graph + def create_paramater_mapping(self, session_dir, physical_graph_file): """ - session_dir = "{0}/workspace/{1}".format( - self.dlg_root, self.get_session_dirname() - ) - - pardict = dict() + Map the runtime or configured parameters to the session environment and SLURM + script paramteres, in anticipation of using substition. + """ + pardict = {} pardict["SESSION_ID"] = os.path.split(session_dir)[-1] pardict["MODULES"] = self.modules pardict["DLG_ROOT"] = self.dlg_root pardict["EXEC_PREFIX"] = self.exec_prefix - slurm_str = dlg_exec_str - if self._slurm_template: - intermed_slurm = self.apply_slurm_template( - self._slurm_template, - pardict["SESSION_ID"], - pardict["DLG_ROOT"] - ) - print("Creating job description") - slurm_str = intermed_slurm + "\n\n" + dlg_exec_str - else: - pardict["NUM_NODES"] = str(self._num_nodes) - pardict["JOB_DURATION"] = label_job_dur(self._job_dur) + pardict["NUM_NODES"] = str(self._num_nodes) + pardict["JOB_DURATION"] = label_job_dur(self._job_dur) pardict["VENV"] = self.venv pardict["PIP_NAME"] = self._pip_name @@ -238,12 +238,31 @@ def create_job_desc(self, physical_graph_file): pardict["CHECK_WITH_SESSION"] = ( "--check_with_session" if self._check_with_session else "" ) + return pardict + + def create_job_desc(self, physical_graph_file): + """ + Creates the slurm script from a physical graph + + This uses string.Template to apply substitutions that are linked to the + parameters defined at runtime. These parameters map to $VALUEs in a pre-defined + execution command that contains the necessary parameters to run DALiuGE through + SLURM. + """ + + session_dir = "{0}/workspace/{1}".format( + self.dlg_root, self.get_session_dirname() + ) + pardict = self.create_paramater_mapping(session_dir, physical_graph_file) + if self._slurm_template: - import string - job_desc = string.Template(slurm_str).safe_substitute(pardict) - else: - job_desc = init_tpl.safe_substitute(pardict) - return job_desc + slurm_str = self.apply_slurm_template(self._slurm_template, + pardict['SESSION_ID'], + pardict['DLG_ROOT']) + return string.Template(slurm_str).safe_substitute(pardict) + + return init_tpl.safe_substitute(pardict) + def mk_session_dir(self, dlg_root: str = ""): """ @@ -279,15 +298,24 @@ def mk_session_dir(self, dlg_root: str = ""): print( f"ERROR: Unable to create {session_dir} on {self.username}@{self.host}, {str(e)}" ) + return None return session_dir def submit_job(self): """ Submits the slurm script to the requested facility + + :returns: jobId, the id of the SLURM job create on the facility. + None if a remote directory could not be created or if an error occurs + during connection. """ jobId = None session_dir = self.mk_session_dir() + if not session_dir: + print("No session_dir created.") + return jobId + physical_graph_file_name = "{0}/{1}".format(session_dir, self._pip_name) if self._physical_graph_template_file: if self._remote: @@ -305,8 +333,7 @@ def submit_job(self): job_file_name = "{0}/jobsub.sh".format(session_dir) job_desc = self.create_job_desc(physical_graph_file_name) - print(job_desc) - # sys.exit() + if self._remote: print(f"Creating SLURM script remotely: {job_file_name}") tjob = tempfile.mktemp() @@ -339,16 +366,3 @@ def submit_job(self): else: print(f"Created job submission script {job_file_name}") return jobId - - -# class ConfigManager: -# """ -# """ - -# def process_config(): -# pass - -# def create_slurm_script(): -# """ -# """ -# job_desc = init_tpl.safe_substitute(pardict) diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py index 7d5aba1e2..a44048cd4 100644 --- a/daliuge-engine/test/deploy/test_slurm_client.py +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -26,14 +26,15 @@ # Note this test will only run with a full installation of DALiuGE. pexpect = pytest.importorskip("dlg.dropmake.pg_generator") + try: from importlib.resources import files except ModuleNotFoundError: from importlib_resources import files # type: ignore import dlg.deploy.configs as configs +from dlg.deploy.create_dlg_job import process_config import daliuge_tests.engine.graphs as test_graphs - from dlg.deploy.slurm_client import SlurmClient import json @@ -64,7 +65,6 @@ def test_client_with_configfile(self): - That we produce the same as the CLI with the same parameters - That we can use the INI file to produce alternative parameters """ - from dlg.deploy.create_dlg_job import process_config pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" cfg_file = Path(__file__).parent / "setonix.ini" cfg = process_config(cfg_file) @@ -81,8 +81,6 @@ def test_client_with_configfile(self): job_desc = client.create_job_desc(pg) curr_file = Path(__file__) compare_script = curr_file.parent / "slurm_script.sh" - with open('slurm_script.sh', 'w') as fp: - fp.write(job_desc) with compare_script.open() as fp: script = fp.read() self.assertEqual(script, job_desc) @@ -107,8 +105,6 @@ def test_client_with_slurm_template(self): job_desc = client.create_job_desc(pg) curr_file = Path(__file__) compare_script = curr_file.parent / "slurm_script_from_template.sh" - with open('output.sh', 'w') as fp: - fp.write(job_desc) with compare_script.open() as fp: script = fp.read() self.assertEqual(script, job_desc) From 7de45810fcb65fd59f6e0e8985cfba11c31f235c Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Mon, 25 Nov 2024 15:28:17 +0800 Subject: [PATCH 08/14] LIU-420: Fix unittest errors by using correct session_dir - Previous approach ended up using local graph as input to the script; this isn't how client.submit_job() works; it uses the remote path the phyiscal graph is expected --- daliuge-engine/dlg/deploy/slurm_client.py | 9 ++++++--- daliuge-engine/test/deploy/slurm_script.sh | 4 ++-- .../test/deploy/slurm_script_from_template.sh | 4 ++-- .../test/deploy/test_slurm_client.py | 19 ++++++++++++++----- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py index 806848d60..f841e5208 100644 --- a/daliuge-engine/dlg/deploy/slurm_client.py +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -263,6 +263,11 @@ def create_job_desc(self, physical_graph_file): return init_tpl.safe_substitute(pardict) + @property + def session_dir(self): + return "{0}/workspace/{1}".format( + self.dlg_root, self.get_session_dirname() + ) def mk_session_dir(self, dlg_root: str = ""): """ @@ -282,9 +287,7 @@ def mk_session_dir(self, dlg_root: str = ""): dlg_root = os.environ["DLG_ROOT"] else: dlg_root = f"{os.environ['HOME']}.dlg" - session_dir = "{0}/workspace/{1}".format( - self.dlg_root, self.get_session_dirname() - ) + session_dir = self.session_dir if not self._remote and not os.path.exists(session_dir): os.makedirs(session_dir) if self._remote: diff --git a/daliuge-engine/test/deploy/slurm_script.sh b/daliuge-engine/test/deploy/slurm_script.sh index 28bc23ccb..48db24b27 100644 --- a/daliuge-engine/test/deploy/slurm_script.sh +++ b/daliuge-engine/test/deploy/slurm_script.sh @@ -3,11 +3,11 @@ #SBATCH --nodes=6 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=2 -#SBATCH --job-name=DALiuGE-EAGLE_TestSession +#SBATCH --job-name=DALiuGE-SLURM_TestSession #SBATCH --time=00:45:00 #SBATCH --error=err-%j.log export DLG_ROOT=/scratch/pawsey0411/test/dlg source /software/projects/pawsey0411/venv/bin/activate -srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace/SLURM_TestSession --physical-graph "/scratch/pawsey0411/test/dlg/workspace/SLURM_TestSession/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid SLURM_TestSession diff --git a/daliuge-engine/test/deploy/slurm_script_from_template.sh b/daliuge-engine/test/deploy/slurm_script_from_template.sh index 6c1740050..80c6d6040 100644 --- a/daliuge-engine/test/deploy/slurm_script_from_template.sh +++ b/daliuge-engine/test/deploy/slurm_script_from_template.sh @@ -4,11 +4,11 @@ #SBATCH --ntasks-per-node=2 #SBATCH --cpus-per-task=4 #SBATCH --mem=0 -#SBATCH --job-name=DALiuGE-EAGLE_TestSession +#SBATCH --job-name=DALiuGE-SLURM_TestSession #SBATCH --time=00:45:00 #SBATCH --error=err-%j.log export DLG_ROOT=/scratch/pawsey0411/test/dlg source /software/projects/pawsey0411/venv/bin/activate -srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace//home/00087932/github/EAGLE_TestSession --physical-graph "/home/00087932/github/EAGLE_test_repo/eagle_test_graphs/daliuge_tests/engine/graphs/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid EAGLE_TestSession \ No newline at end of file +srun -l python3 -m dlg.deploy.start_dlg_cluster --log_dir /scratch/pawsey0411/test/dlg/workspace/SLURM_TestSession --physical-graph "/scratch/pawsey0411/test/dlg/workspace/SLURM_TestSession/SLURM_HelloWorld_simplePG.graph" --verbose-level 1 --max-threads 0 --app 0 --num_islands 1 --ssid SLURM_TestSession \ No newline at end of file diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py index a44048cd4..87c68db47 100644 --- a/daliuge-engine/test/deploy/test_slurm_client.py +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -43,16 +43,18 @@ class TestSlurmClient(unittest.TestCase): def test_client_with_cli(self): # Use special graph that also contains file name. See 'create_dlg_job.py' pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" - + client = SlurmClient( facility="setonix", num_nodes=6, job_dur=45, - physical_graph_template_file=str(pg), + physical_graph_template_file=pg, suffix="TestSession", username="test" ) - job_desc = client.create_job_desc(pg) + session_dir = client.session_dir + physical_graph_file_name = "{0}/{1}".format(session_dir, client._pip_name) + job_desc = client.create_job_desc(physical_graph_file_name) curr_file = Path(__file__) compare_script = curr_file.parent / "slurm_script.sh" with compare_script.open() as fp: @@ -77,8 +79,10 @@ def test_client_with_configfile(self): config=cfg, username='test' ) + session_dir = client.session_dir + physical_graph_file_name = "{0}/{1}".format(session_dir, client._pip_name) + job_desc = client.create_job_desc(physical_graph_file_name) - job_desc = client.create_job_desc(pg) curr_file = Path(__file__) compare_script = curr_file.parent / "slurm_script.sh" with compare_script.open() as fp: @@ -102,9 +106,14 @@ def test_client_with_slurm_template(self): slurm_template=slurm_template, username='test' ) - job_desc = client.create_job_desc(pg) + session_dir = client.session_dir + physical_graph_file_name = "{0}/{1}".format(session_dir, client._pip_name) + job_desc = client.create_job_desc(physical_graph_file_name) + curr_file = Path(__file__) compare_script = curr_file.parent / "slurm_script_from_template.sh" with compare_script.open() as fp: script = fp.read() + print(job_desc) + print(script) self.assertEqual(script, job_desc) From 0de27c1e159539132329d60f123ca33e45392098 Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Mon, 25 Nov 2024 15:52:34 +0800 Subject: [PATCH 09/14] LIU-420: Set maxDiff to None --- daliuge-engine/test/deploy/test_slurm_client.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/daliuge-engine/test/deploy/test_slurm_client.py b/daliuge-engine/test/deploy/test_slurm_client.py index 87c68db47..118032ead 100644 --- a/daliuge-engine/test/deploy/test_slurm_client.py +++ b/daliuge-engine/test/deploy/test_slurm_client.py @@ -40,15 +40,18 @@ class TestSlurmClient(unittest.TestCase): - def test_client_with_cli(self): + def setUp(self) -> None: + super().setUp() + self.maxDiff = None # Use special graph that also contains file name. See 'create_dlg_job.py' - pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" + self.pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" + def test_client_with_cli(self): client = SlurmClient( facility="setonix", num_nodes=6, job_dur=45, - physical_graph_template_file=pg, + physical_graph_template_file=self.pg, suffix="TestSession", username="test" ) @@ -67,14 +70,13 @@ def test_client_with_configfile(self): - That we produce the same as the CLI with the same parameters - That we can use the INI file to produce alternative parameters """ - pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" cfg_file = Path(__file__).parent / "setonix.ini" cfg = process_config(cfg_file) client = SlurmClient( facility="setonix", num_nodes=6, job_dur=45, - physical_graph_template_file=str(pg), + physical_graph_template_file=self.pg, suffix="TestSession", config=cfg, username='test' @@ -95,13 +97,12 @@ def test_client_with_slurm_template(self): Use 'slurm_script_from_template.sh as a comparison file to demonstrate how the template approach gives us more options. """ - pg = files(test_graphs) / "SLURM_HelloWorld_simplePG.graph" template = Path(__file__).parent / "example_template.slurm" with template.open() as fp: slurm_template = fp.read() client = SlurmClient( facility="setonix", - physical_graph_template_file=str(pg), + physical_graph_template_file=self.pg, suffix="TestSession", slurm_template=slurm_template, username='test' From e9b54ea112f10944277dd3055140d8aee97c3e4f Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Wed, 29 Jan 2025 16:10:27 +0800 Subject: [PATCH 10/14] LIU-420: Add create_dlg_job.py to `dlg create` command line argument - This also adds the config_manager.py, which setups local user configs based on templates in dlg/eploy/configs. --- .../dlg/deploy/configs/config_manager.py | 180 ++++++++++++++++++ daliuge-engine/dlg/deploy/create_dlg_job.py | 73 +++++-- daliuge-engine/dlg/runtime/tool_commands.py | 5 + daliuge-engine/setup.py | 1 + 4 files changed, 239 insertions(+), 20 deletions(-) create mode 100644 daliuge-engine/dlg/deploy/configs/config_manager.py diff --git a/daliuge-engine/dlg/deploy/configs/config_manager.py b/daliuge-engine/dlg/deploy/configs/config_manager.py new file mode 100644 index 000000000..cb29e9aac --- /dev/null +++ b/daliuge-engine/dlg/deploy/configs/config_manager.py @@ -0,0 +1,180 @@ +# +# ICRAR - International Centre for Radio Astronomy Research +# (c) UWA - The University of Western Australia, 2025 +# Copyright by UWA (in the framework of the ICRAR) +# All rights reserved +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# +import shutil +import textwrap + +from enum import Enum, auto +from dataclasses import dataclass +from importlib.resources import files, as_file +from pathlib import Path + +import dlg.deploy.configs as default_configs + +USER_CONFIG_DIR = Path.home() / ".config" +DLG_CONFIG_DIR = USER_CONFIG_DIR / "dlg" + +class ConfigType(Enum): + ENV = "ini" + SLURM = "slurm" + + +class ConfigDirState(Enum): + NEW = auto() + EXISTS = auto() + NOT_CREATED = auto() + + +@dataclass +class ConfigManager: + """ + Utility class to manager job submission configuration files + """ + + facilities: list[str] + prompt_setup = True + + def setup_user(self): + """ + Called when passing the "--setup" command of the "dlg remote" option. + """ + dir_state = self.create_dlg_config_directory() + if dir_state == ConfigDirState.NEW: + self.copy_defaults_to_user_config_directory() + self.print_available_config() + elif dir_state == ConfigDirState.EXISTS: + print(f"{DLG_CONFIG_DIR} already exists.") + else: + print(f"{DLG_CONFIG_DIR} was not created.") + + def prompt_user(self): + """ + Create prompt and validate it against expected output + """ + + while True: + ui = input("Do you want to create a $HOME/.config/dlg directory" + " to store your custom configuration files and scripts?") + if ui.lower() == "y" or ui.lower() == "n": + return ui.lower() == "y" + else: + print("Please selected from the options (Y/N)") + + def create_dlg_config_directory(self): + """ + Establish the user $HOME/.config/dlg/ directory + """ + if DLG_CONFIG_DIR.exists(): + return ConfigDirState.EXISTS + else: + if self.prompt_user(): + try: + Path.mkdir(DLG_CONFIG_DIR) + return ConfigDirState.NEW + except Exception as e: + raise (e) + else: + return ConfigDirState.NOT_CREATED + + def copy_defaults_to_user_config_directory(self): + """ + Move configuration files from dlg/deploy/configs to DLG_CONFIG_DIR + """ + cfg_path = files(default_configs) + + configs = [p for p in cfg_path.iterdir() if + (".ini" in p.name) or (".slurm" in p.name)] + + if DLG_CONFIG_DIR.exists(): + for cfg in configs: + if cfg.exists(): + shutil.copy(cfg, DLG_CONFIG_DIR) + else: + print("Unable to copy %s, does not exist", cfg) + else: + print("Unable to copy to %s, does is not available", DLG_CONFIG_DIR) + + def get_user_configs(self) -> dict[Path]: + """ + Returns dictionary of filetypes to split out? + """ + configs = {} + if not DLG_CONFIG_DIR.exists(): + return configs + filetypes = ["ini", "slurm"] + + for ftype in filetypes: + configs[ftype] = [cfg for cfg in DLG_CONFIG_DIR.iterdir() if + ftype in cfg.name] + return configs + + def print_available_config(self): + """ + Present the options available for deployment + """ + print("User Configs (~/.config/dlg)") + print("----------------------------") + user_configs = self.get_user_configs() + if user_configs: + print("Environments (--config_file):") + for config in user_configs["ini"]: + print(textwrap.indent(config.name, "\t")) + print("Slurm scripts (--slurm_template):") + for config in user_configs["slurm"]: + print(textwrap.indent(config.name, "\t")) + else: + print(textwrap.indent("N/a: User-specific directory is not setup.", "\t")) + + print("\nDALiuGE Defaults (-f/--facility):") + print("-----------------------------------") + for f in self.facilities: + print(textwrap.indent(f, "\t")) + + def load_user_config(self, config_type: ConfigType, config_choice: str) -> Path: + """ + Resolve the config type and choice to a config file and return full path. + + Path resolution occurs in the following hierarchy: + - If config choice is absolute path, return the absolute path + - If config choice is not an absolute path, but the path exists (i.e. can be + found) return the path + - If the config choice is relative, and doesn't exist, check if it exists in + DLG_CONFIG_DIR + - If it does exist, construct the path + - If it doesn't exist, return None + """ + if not DLG_CONFIG_DIR.exists() and self.prompt_setup: + print("NOTE: No user configs exists; consider dlg create --setup.") + self.prompt_setup = False + + choice_path = Path(config_choice) + if choice_path.is_absolute() and p.exists(): + return choice_path + elif choice_path.exists(): + return choice_path.absolute() + else: + user_configs = self.get_user_configs() + + options = user_configs.get(config_type.value, []) + for o in options: + if config_choice == o.name: + return o + return None diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 25725bd02..77586f427 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -52,6 +52,9 @@ init_pgt_unroll_repro_data, init_pgt_partition_repro_data, ) + +from dlg.deploy.configs.config_manager import ConfigManager, ConfigType + from dlg.dropmake import pg_generator FACILITIES = ConfigFactory.available() @@ -430,26 +433,34 @@ def process_slurm_template(template_file: str): def create_experiment_group(parser: optparse.OptionParser): from optparse import OptionGroup - group=OptionGroup(parser, "Experimental Options", + group=OptionGroup(parser, "Experimental Options", "Caution: These are not properly tested and likely to" "be rough around the edges.") group.add_option( "--config_file", - dest="config_file", - type="string", - action="store", + dest="config_file", + type="string", + action="store", help="Use INI configuration file.", default=None ) group.add_option( - "--slurm_template", + "--slurm_template", dest="slurm_template", - type="string", - action="store", - help="Use SLURM template file for job submission. WARNING: Using this command will over-write other job-parameters passed here.", + type="string", + action="store", + help="Use SLURM template file for job submission. WARNING: Using this command will over-write other job-parameters passed here.", default=None ) + group.add_option( + "--setup", + dest="setup", + action="store_true", + help="Setup local '$HOME/.config/dlg' directory to store custom environment config and slurm scripts", + default=False + ) + return group def create_job_group(): @@ -465,16 +476,17 @@ def create_graph_group(): pass -def main(): +def run(_, args=None): parser = optparse.OptionParser( - usage="\n%prog -a [1|2] -f [options]\n\n%prog -h for further help" + usage="\n%prog --action [submit|analyse] -f [options]\n\n%prog -h for further help" ) parser.add_option( "-a", "--action", action="store", - type="int", + type="choice", + choices=["submit", "analyse"], dest="action", help="1 - create/submit job, 2 - analyse log", default=None, @@ -698,17 +710,25 @@ def main(): ) parser.add_option_group(create_experiment_group(parser)) - (opts, _) = parser.parse_args(sys.argv) + + cfg_manager = ConfigManager(FACILITIES) + + if opts.setup: + cfg_manager.setup_user() + sys.exit(0) + if opts.configs: - print(f"Available facilities: {FACILITIES}") + print(f"Available facilities:\n") + cfg_manager.print_available_config() sys.exit(1) if not (opts.action and opts.facility): parser.error("Missing required parameters!") + parser.print_help() if opts.facility not in FACILITIES: parser.error(f"Unknown facility provided. Please choose from {FACILITIES}") - if opts.action == 2: + if opts.action == "analyse": if opts.log_dir is None: # you can specify: # either a single directory @@ -736,7 +756,7 @@ def main(): else: log_parser = LogParser(opts.log_dir) log_parser.parse(out_csv=opts.csv_output) - elif opts.action == 1: + elif opts.action == "submit": path_to_graph_file = None if opts.logical_graph and opts.physical_graph: parser.error( @@ -778,9 +798,22 @@ def main(): else: pgt_file = path_to_graph_file - config = process_config(opts.config_file) if opts.config_file else None - template = process_slurm_template( - opts.slurm_template) if opts.slurm_template else None + if opts.config_file: + config_path = cfg_manager.load_user_config(ConfigType.ENV, opts.config_file) + if not config_path: + print("Provided --config_file option that does not exist!") + sys.exit(1) + config = process_config(config_path) if config_path else None + else: + config = None + if opts.slurm_template: + template_path = cfg_manager.load_user_config(ConfigType.SLURM, opts.slurm_template) + if not template_path: + print("Provided --slurm_template option that does not exist!") + sys.exit(1) + template = process_slurm_template(template_path) if template_path else None + else: + template = None client = SlurmClient( dlg_root=opts.dlg_root, @@ -804,7 +837,7 @@ def main(): config=config, slurm_template=template ) - + client._visualise_graph = opts.visualise_graph client.submit_job() else: @@ -813,4 +846,4 @@ def main(): if __name__ == "__main__": - main() + run() diff --git a/daliuge-engine/dlg/runtime/tool_commands.py b/daliuge-engine/dlg/runtime/tool_commands.py index 9f913858d..7ba6791ec 100644 --- a/daliuge-engine/dlg/runtime/tool_commands.py +++ b/daliuge-engine/dlg/runtime/tool_commands.py @@ -54,3 +54,8 @@ def register_commands(): "Print the directory where C header files can be found", include_dir, ) + tool.cmdwrap( + "create", + "Create a DALiuGE graph to a remote computing environment", + "dlg.deploy.create_dlg_job:run", + ) diff --git a/daliuge-engine/setup.py b/daliuge-engine/setup.py index 982accc61..4c2d3db8a 100644 --- a/daliuge-engine/setup.py +++ b/daliuge-engine/setup.py @@ -190,6 +190,7 @@ def run(self): packages=find_packages(exclude=("test", "test.*", "fabfile")), package_data={ "dlg.apps": ["dlg_app.h", "dlg_app2.h"], + "dlg.deploy.configs": ["*.ini", ".slurm"], "dlg.manager": [ "web/*.html", "web/static/css/*.css", From 8d4c6f5815775a2bb0a5094051d641095f4e36bc Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Wed, 29 Jan 2025 17:04:35 +0800 Subject: [PATCH 11/14] LIU-420: Add dlg config, which splits out config setup from dlg create --- .../dlg/deploy/configs/config_manager.py | 37 ++++++++++++++++++- daliuge-engine/dlg/deploy/create_dlg_job.py | 15 +------- daliuge-engine/dlg/deploy/slurm_client.py | 2 - daliuge-engine/dlg/runtime/tool_commands.py | 4 ++ 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/daliuge-engine/dlg/deploy/configs/config_manager.py b/daliuge-engine/dlg/deploy/configs/config_manager.py index cb29e9aac..85e8e2771 100644 --- a/daliuge-engine/dlg/deploy/configs/config_manager.py +++ b/daliuge-engine/dlg/deploy/configs/config_manager.py @@ -19,6 +19,8 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # +import sys +import optparse import shutil import textwrap @@ -28,6 +30,7 @@ from pathlib import Path import dlg.deploy.configs as default_configs +from dlg.deploy.configs import ConfigFactory USER_CONFIG_DIR = Path.home() / ".config" DLG_CONFIG_DIR = USER_CONFIG_DIR / "dlg" @@ -162,7 +165,7 @@ def load_user_config(self, config_type: ConfigType, config_choice: str) -> Path: - If it doesn't exist, return None """ if not DLG_CONFIG_DIR.exists() and self.prompt_setup: - print("NOTE: No user configs exists; consider dlg create --setup.") + print("NOTE: No user configs exists; consider running 'dlg config --setup'.") self.prompt_setup = False choice_path = Path(config_choice) @@ -178,3 +181,35 @@ def load_user_config(self, config_type: ConfigType, config_choice: str) -> Path: if config_choice == o.name: return o return None + +def run(_, args): + + cfg_manager = ConfigManager(ConfigFactory.available()) + + parser = optparse.OptionParser() + parser.add_option( + "--setup", + dest="setup", + action="store_true", + help="Setup local '$HOME/.config/dlg' directory to store custom environment config and slurm scripts", + default=False + ) + parser.add_option( + "-l", "--list", + dest="list", + action="store_true", + help="List the available configuration for DALiuGE deployment." + ) + (opts, _) = parser.parse_args(sys.argv) + if opts.setup: + cfg_manager.setup_user() + sys.exit(0) + elif opts.list: + print(f"Available facilities:\n") + cfg_manager.print_available_config() + sys.exit(0) + else: + parser.print_help() + + + diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 77586f427..0df8ba73b 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -453,13 +453,6 @@ def create_experiment_group(parser: optparse.OptionParser): help="Use SLURM template file for job submission. WARNING: Using this command will over-write other job-parameters passed here.", default=None ) - group.add_option( - "--setup", - dest="setup", - action="store_true", - help="Setup local '$HOME/.config/dlg' directory to store custom environment config and slurm scripts", - default=False - ) return group @@ -714,14 +707,8 @@ def run(_, args=None): cfg_manager = ConfigManager(FACILITIES) - if opts.setup: - cfg_manager.setup_user() - sys.exit(0) - if opts.configs: - print(f"Available facilities:\n") - cfg_manager.print_available_config() - sys.exit(1) + print(f"Available facilities: {FACILITIES}") if not (opts.action and opts.facility): parser.error("Missing required parameters!") parser.print_help() diff --git a/daliuge-engine/dlg/deploy/slurm_client.py b/daliuge-engine/dlg/deploy/slurm_client.py index f841e5208..bd9e22e54 100644 --- a/daliuge-engine/dlg/deploy/slurm_client.py +++ b/daliuge-engine/dlg/deploy/slurm_client.py @@ -83,8 +83,6 @@ def __init__( slurm_template=None, suffix=None ): - - ## TODO ## Here, we want to separate out the following ## Config derived from CONFIG Factory - we replace with ini file ## Config derived from CLI, intended for replacement in the SLURM job script diff --git a/daliuge-engine/dlg/runtime/tool_commands.py b/daliuge-engine/dlg/runtime/tool_commands.py index 7ba6791ec..eaaa4002f 100644 --- a/daliuge-engine/dlg/runtime/tool_commands.py +++ b/daliuge-engine/dlg/runtime/tool_commands.py @@ -59,3 +59,7 @@ def register_commands(): "Create a DALiuGE graph to a remote computing environment", "dlg.deploy.create_dlg_job:run", ) + tool.cmdwrap("config", + "Manage dlg config environment", + "dlg.deploy.configs.config_manager:run") + From 69aec34ae5c59b6355c3ec63df14dd9eb59dfb8b Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Thu, 30 Jan 2025 14:50:21 +0800 Subject: [PATCH 12/14] LIU-420: Fix linter errors --- daliuge-engine/dlg/deploy/configs/config_manager.py | 2 +- daliuge-engine/dlg/deploy/create_dlg_job.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/daliuge-engine/dlg/deploy/configs/config_manager.py b/daliuge-engine/dlg/deploy/configs/config_manager.py index 85e8e2771..8c584f5fa 100644 --- a/daliuge-engine/dlg/deploy/configs/config_manager.py +++ b/daliuge-engine/dlg/deploy/configs/config_manager.py @@ -169,7 +169,7 @@ def load_user_config(self, config_type: ConfigType, config_choice: str) -> Path: self.prompt_setup = False choice_path = Path(config_choice) - if choice_path.is_absolute() and p.exists(): + if choice_path.is_absolute() and choice_path.exists(): return choice_path elif choice_path.exists(): return choice_path.absolute() diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 0df8ba73b..f982f744b 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -833,4 +833,4 @@ def run(_, args=None): if __name__ == "__main__": - run() + run(None, sys.argv[1:]) From 1782421a11c28d1fe588d46177918d56c8154d95 Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Thu, 30 Jan 2025 15:20:30 +0800 Subject: [PATCH 13/14] LIU-420: Fix failing tool tests. --- daliuge-engine/dlg/deploy/configs/config_manager.py | 2 +- daliuge-engine/test/test_tool.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/daliuge-engine/dlg/deploy/configs/config_manager.py b/daliuge-engine/dlg/deploy/configs/config_manager.py index 8c584f5fa..ebff4752e 100644 --- a/daliuge-engine/dlg/deploy/configs/config_manager.py +++ b/daliuge-engine/dlg/deploy/configs/config_manager.py @@ -75,7 +75,7 @@ def prompt_user(self): while True: ui = input("Do you want to create a $HOME/.config/dlg directory" - " to store your custom configuration files and scripts?") + " to store your custom configuration files and scripts (y/n)? ") if ui.lower() == "y" or ui.lower() == "n": return ui.lower() == "y" else: diff --git a/daliuge-engine/test/test_tool.py b/daliuge-engine/test/test_tool.py index efb155a64..f2d4f7c69 100644 --- a/daliuge-engine/test/test_tool.py +++ b/daliuge-engine/test/test_tool.py @@ -19,6 +19,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # +import pytest import subprocess import unittest @@ -26,6 +27,8 @@ from dlg.common import tool from dlg.testutils import ManagerStarter +# Note this test will only run with a full installation of DALiuGE. +pexpect = pytest.importorskip("dlg.dropmake") class TestTool(ManagerStarter, unittest.TestCase): def test_cmdhelp(self): From c987c0a4714450f1f010fb6523eb5220ae4271ba Mon Sep 17 00:00:00 2001 From: Ryan Bunney Date: Thu, 30 Jan 2025 15:39:09 +0800 Subject: [PATCH 14/14] LIU-420: Fix erroneous merge-conflict missing comma. --- daliuge-engine/dlg/deploy/create_dlg_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daliuge-engine/dlg/deploy/create_dlg_job.py b/daliuge-engine/dlg/deploy/create_dlg_job.py index 7aac3d6c8..c60a32ca1 100644 --- a/daliuge-engine/dlg/deploy/create_dlg_job.py +++ b/daliuge-engine/dlg/deploy/create_dlg_job.py @@ -829,7 +829,7 @@ def run(_, args=None): submit=opts.submit, remote=opts.remote, username=opts.username, - ssh_key=opts.ssh_key + ssh_key=opts.ssh_key, config=config, slurm_template=template )