helpers/helpers.yaml

# beNNch - Unified execution, collection, analysis and
# comparison of neural network simulation benchmarks.
# Copyright (C) 2021 Forschungszentrum Juelich GmbH, INM-6

# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

# SPDX-License-Identifier: GPL-3.0-or-later

fileset:
    name: files
    copy: ${job_file}.in
substituteset:
    - name: sub_bench_job
      iofile: {in: $job_file.in, out: $job_file}
      sub:
        - {source: "#PARTITION#", dest: $partition}
        - {source: "#NODES#", dest: $num_nodes}
        - {source: "#NTASKS#", dest: $num_tasks}
        - {source: "#NTASKS_PER_NODE#", dest: $tasks_per_node}
        - {source: "#TIME#", dest: $walltime}
        - {source: "#ERRPATH#", dest: $err_file}
        - {source: "#OUTPATH#", dest: $out_file}
        - {source: "#COMMANDS#", dest: $exec_bench}
        - {source: "#READY#", dest: $ready_file}
        - {source: "#JOB_NAME#", dest: $job_name}
        - {source: "#ACCOUNT#", dest: $account}
    - name: sub_build_job
      iofile: {in: $job_file.in, out: $job_file}
      sub:
        - {source: "#PARTITION#", dest: $partition}
        - {source: "#NODES#", dest: 1}
        - {source: "#NTASKS#", dest: 1}
        - {source: "#NTASKS_PER_NODE#", dest: 1}
        - {source: "#TIME#", dest: "00:15:00"}
        - {source: "#ERRPATH#", dest: $err_file}
        - {source: "#OUTPATH#", dest: $out_file}
        - {source: "#COMMANDS#", dest: $exec_build}
        - {source: "#READY#", dest: $ready_file}
        - {source: "#JOB_NAME#", dest: Builder_build}
        - {source: "#ACCOUNT#", dest: $account}

parameterset:
  - name: slurm_bench
    parameter:
      - {name: submit_cmd, "_": sbatch }
      - {name: job_file, "_": job.slurm }
      - {name: num_vps, type: int, mode: python, "_": $tasks_per_node * $threads_per_task * $num_nodes}
      - {name: num_tasks, type: int, mode: python, "_": $tasks_per_node * $num_nodes}
        # next is alias to make result table template work
      - {name: ready_file, "_": ready}
      - {name: err_file, "_": stderr}
      - {name: out_file, "_": stdout}
  - name: slurm_build
    parameter:
      - {name: submit_cmd, "_": sbatch }
      - {name: job_file, "_": job.slurm }
      - {name: ready_file, "_": ready}
      - {name: err_file, "_": stderr}
      - {name: out_file, "_": stdout}
  - name: run_benchmark
    parameter:
      - {name: job_name, type: string, "_": benchmark}
      -
       name: exec_bench
       separator: ;
       _: |
        module use $buildermod
        if [ -z "$suffix" ]
        then
           module load $simulator/$version/$variant
        else
           module load $simulator/$version/$variant_$suffix
        fi
        export OMP_NUM_THREADS=$threads_per_task
        export OMP_DISPLAY_ENV=VERBOSE
        export OMP_DISPLAY_AFFINITY=TRUE
        export OMP_PROC_BIND=TRUE
        ${optional_run_command}
        srun --cpus-per-task=${threads_per_task} ${affinity} python ${run_file} ${run_args}
        srun -n 1 --nodes 1 python ${base_path}/helpers/collect_timer_data.py ${log_path}
        srun -n 1 --nodes 1 python ${base_path}/helpers/cpu_logging.py ${jube_wp_abspath}
        cd ${model_path}
        model_git_commit_hash=$(git rev-parse HEAD)
        cd -
        cat >job.json <<EOT
        {"num_nodes":"${num_nodes}",
        "tasks_per_node":"${tasks_per_node}",
        "threads_per_task":"${threads_per_task}",
        "affinity":"${affinity}",
        "simulator":"${simulator}",
        "simulator-version":"${version}",
        "simulator-variant":"${variant}",
        "simulator-suffix":"${suffix}",
        "model_name":"${model_name}",
        "model_git_commit_hash":"${model_git_commit_hash}",
        "network_state":"${network_state}",
        "record_spikes":"${record_spikes}",
        "scaling_type":"${scaling_type}"
        }
        EOT
        metadata_uuid=$(uuidgen)
        srun -n 1 --nodes 1 python ${base_path}/helpers/metadata_archive.py ${jube_wp_abspath}/${metadata_uuid}
        cp ${jube_wp_abspath}/stderr ${jube_wp_abspath}/${metadata_uuid}
        tar -czf ${metadata_uuid}.tgz -C ${jube_wp_abspath} ${metadata_uuid}
        rm -r ${jube_wp_abspath}/${metadata_uuid}

  - name: run_build
    parameter:
      - {name: job_name, type: string, "_": benchmark}
      -
       name: exec_build
       separator: ;
       _:
        build -s $simulator $version $variant $suffix

  - name: init_job_file_variables
    parameter:
       - {name: optional_run_command, "_": ""}
       - {name: run_args, type: string, _: ""}

# experiment configuration
  - name: scaling_experiment
    parameter:
       - {name: scale, mode: python, type: float, "_": $scale_N*$num_nodes if "$scaling_type" == "weak" else $scale_N}
patternset:
    name: timer_pattern
    pattern:
       - {name: wall_time_sim, mode: pattern, type: float, "_": time_simulate $jube_pat_fp}
       - {name: wall_time_create, mode: pattern, type: float, "_": time_construction_create $jube_pat_fp}
       - {name: wall_time_connect, mode: pattern, type: float, "_": time_construction_connect $jube_pat_fp}
       - {name: wall_time_phase_collocate, mode: pattern, type: float, "_": time_collocate_spike_data $jube_pat_fp}
       - {name: wall_time_phase_communicate, mode: pattern, type: float, "_": time_communicate_spike_data $jube_pat_fp}
       - {name: wall_time_phase_deliver, mode: pattern, type: float, "_": time_deliver_spike_data $jube_pat_fp}
       - {name: wall_time_phase_update, mode: pattern, type: float, "_": time_update $jube_pat_fp}
       - {name: wall_time_communicate_target_data, mode: pattern, type: float, "_": time_communicate_target_data $jube_pat_fp}
       - {name: wall_time_gather_spike_data, mode: pattern, type: float, "_": time_gather_spike_data $jube_pat_fp}
       - {name: wall_time_gather_target_data, mode: pattern, type: float, "_": time_gather_target_data $jube_pat_fp}
       - {name: wall_time_communicate_prepare, mode: pattern, type: float, "_": time_communicate_prepare $jube_pat_fp}
       - {name: py_time_kernel_prepare, mode: pattern, dotall: True, type: float, "_": py_time_kernel_prepare $jube_pat_fp}
       - {name: py_time_network_local, mode: pattern, dotall: True, type: float, "_": py_time_network_local $jube_pat_fp}
       - {name: py_time_network_global, mode: pattern, dotall: True, type: float, "_": py_time_network_global $jube_pat_fp}
       - {name: py_time_simulate, mode: pattern, dotall: True, type: float, "_": py_time_simulate $jube_pat_fp}
       - {name: py_time_presimulate, mode: pattern, dotall: True, type: float, "_": py_time_presimulate $jube_pat_fp}
       - {name: py_time_network_prepare, mode: pattern, dotall: True, type: float, "_": py_time_network_prepare $jube_pat_fp}
       - {name: py_time_create, mode: pattern, dotall: True, type: float, "_": py_time_create $jube_pat_fp}
       - {name: py_time_connect_area, mode: pattern, dotall: True, type: float, "_": py_time_connect_area $jube_pat_fp}
       - {name: py_time_connect_cc, mode: pattern, dotall: True, type: float, "_": py_time_connect_cc $jube_pat_fp}
       - {name: py_time_connect, mode: pattern, dotall: True, type: float, "_": py_time_connect $jube_pat_fp}
       - {name: base_memory, mode: pattern, dotall: True, type: float, "_": base_memory $jube_pat_fp}
       - {name: network_memory, mode: pattern, dotall: True, type: float, "_": network_memory $jube_pat_fp}
       - {name: init_memory, mode: pattern, dotall: True, type: float, "_": init_memory $jube_pat_fp}
       - {name: total_memory, mode: pattern, dotall: True, type: float, "_": total_memory $jube_pat_fp}
       - {name: num_connections, mode: pattern, dotall: True, type: float, "_": num_connections $jube_pat_fp}
       - {name: local_spike_counter, mode: pattern, dotall: True, type: float, "_": local_spike_counter $jube_pat_fp}
       - {name: e_counter, mode: pattern, dotall: True, type: float, "_": e_counter $jube_pat_fp}