From 358b2b90692fcceda9bb6983dc6d8eb4abb66058 Mon Sep 17 00:00:00 2001 From: Peter Scheibel Date: Mon, 30 Sep 2024 19:43:10 -0700 Subject: [PATCH] Fix Tioga system class (#382) * rocm_arch was not set by saxpy experiment or tioga system * dynamic system+experiment test should work now * style edit * generate saxpy experiment description for separate dry run in a distinct dir * remove now-unused variable --- .github/workflows/run.yml | 23 ++++++++++---------- lib/benchpark/system.py | 18 +++++++++++---- var/exp_repo/experiments/saxpy/experiment.py | 9 +++++++- var/sys_repo/systems/tioga/system.py | 3 +++ 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index e1e9ad678..b4216cfbf 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -184,18 +184,17 @@ jobs: --disable-logger \ workspace setup --dry-run -# TODO: mixing dynamic saxpy with dynamic Tioga fails -# - name: Dry run dynamic saxpy/rocm with dynamic Tioga -# run: | -# ./bin/benchpark system init --dest=tioga-system2 tioga rocm=551 compiler=cce ~gtl -# ./bin/benchpark experiment init --dest=saxpy-rocm saxpy programming_model=rocm -# ./bin/benchpark setup ./saxpy-rocm ./tioga-system2 workspace/ -# . workspace/setup.sh -# ramble \ -# --workspace-dir workspace/saxpy-rocm/Tioga-d34a754/workspace \ -# --disable-progress-bar \ -# --disable-logger \ -# workspace setup --dry-run + - name: Dry run dynamic saxpy/rocm with dynamic Tioga + run: | + ./bin/benchpark system init --dest=tioga-system2 tioga rocm=551 compiler=cce ~gtl + ./bin/benchpark experiment init --dest=saxpy-rocm2 saxpy programming_model=rocm + ./bin/benchpark setup ./saxpy-rocm2 ./tioga-system2 workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir workspace/saxpy-rocm2/Tioga-d34a754/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run - name: Dry run laghos/mpi-only on LLNL-Magma-Penguin-icelake-OmniPath with allocation modifier run: | diff --git a/lib/benchpark/system.py b/lib/benchpark/system.py index 8fb3e5abd..945b6e4f9 100644 --- a/lib/benchpark/system.py +++ b/lib/benchpark/system.py @@ -148,19 +148,29 @@ def compiler_description(self, output_dir): self._merge_config_files(compilers_schema.schema, selections, aux_compilers) + def system_specific_variables(self): + return {} + def variables_yaml(self): for attr in self.required: if not getattr(self, attr, None): raise ValueError(f"Missing required info: {attr}") optionals = list() - optionals_as_cfg = "" for opt in ["sys_gpus_per_node", "sys_mem_per_node", "queue"]: if getattr(self, opt, None): optionals.append(f"{opt}: {getattr(self, opt)}") + + system_specific = list() + for k, v in self.system_specific_variables().items(): + system_specific.append(f"{k}: {v}") + + extra_variables = optionals + system_specific indent = " " * 2 - if optionals: - optionals_as_cfg = f"\n{indent}".join(optionals) + extras_as_cfg = "" + if extra_variables: + extras_as_cfg = f"\n{indent}".join(extra_variables) + return f"""\ # SPDX-License-Identifier: Apache-2.0 @@ -168,7 +178,7 @@ def variables_yaml(self): timeout: "{self.timeout}" scheduler: "{self.scheduler}" sys_cores_per_node: "{self.sys_cores_per_node}" - {optionals_as_cfg} + {extras_as_cfg} max_request: "1000" # n_ranks/n_nodes cannot exceed this n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value diff --git a/var/exp_repo/experiments/saxpy/experiment.py b/var/exp_repo/experiments/saxpy/experiment.py index ba89144ec..e7702453f 100644 --- a/var/exp_repo/experiments/saxpy/experiment.py +++ b/var/exp_repo/experiments/saxpy/experiment.py @@ -41,13 +41,20 @@ def compute_applications_section(self): } the_experiment.update(matrix_cfg) + if self.spec.satisfies("programming_model=openmp"): + experiment_id = "saxpy_{n}_{n_nodes}_{omp_num_threads}" + elif self.spec.satisfies("programming_model=cuda") or self.spec.satisfies( + "programming_model=rocm" + ): + experiment_id = "saxpy_{n}" + return { "saxpy": { # ramble Application name "workloads": { # TODO replace with a hash once we have one? "problem": { "experiments": { - "saxpy_{n}_{n_nodes}_{omp_num_threads}": the_experiment, + experiment_id: the_experiment, } } } diff --git a/var/sys_repo/systems/tioga/system.py b/var/sys_repo/systems/tioga/system.py index 53c4380df..a485f9d71 100644 --- a/var/sys_repo/systems/tioga/system.py +++ b/var/sys_repo/systems/tioga/system.py @@ -89,6 +89,9 @@ def compiler_configs(self): return selections + def system_specific_variables(self): + return {"rocm_arch": "gfx90a"} + def sw_description(self): """This is somewhat vestigial: for the Tioga config that is committed to the repo, multiple instances of mpi/compilers are stored and