From adb415252844338e480b4779f1fcb81f3e1e5613 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 8 Oct 2024 10:46:35 +0100 Subject: [PATCH 01/28] DOCA workflow support --- .github/workflows/package-build-ofed.yml | 27 +++++-- etc/kayobe/ansible/build-ofed-rocky.yml | 34 +-------- etc/kayobe/ansible/push-ofed.yml | 72 ++++++++++++++----- etc/kayobe/dnf.yml | 32 ++++++++- .../environments/ci-aio/stackhpc-ci.yml | 2 + .../group_vars/{seed => ofed-builder}/lvm.yml | 3 +- .../group_vars/ofed-builder/stackhpc-repos | 3 + .../environments/ci-builder/inventory/hosts | 1 + .../environments/ci-builder/stackhpc-ci.yml | 7 ++ etc/kayobe/ofed.yml | 27 ++++--- etc/kayobe/pulp-repo-versions.yml | 10 ++- etc/kayobe/pulp.yml | 12 ++++ etc/kayobe/stackhpc.yml | 8 +++ 13 files changed, 169 insertions(+), 69 deletions(-) rename etc/kayobe/environments/ci-builder/inventory/group_vars/{seed => ofed-builder}/lvm.yml (96%) create mode 100644 etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index 798e0c4bf..0137c4f19 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -1,5 +1,5 @@ --- -name: Build OFED packages +name: Build OFED kernel modules on: workflow_dispatch: inputs: @@ -23,7 +23,7 @@ env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} jobs: overcloud-ofed-packages: - name: Build OFED packages + name: Build OFED kernel modules if: github.repository == 'stackhpc/stackhpc-kayobe-config' runs-on: arc-skc-host-image-builder-runner permissions: {} @@ -48,6 +48,11 @@ jobs: BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' src/kayobe-config/.gitreview) echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT + - name: Generate OFED tag + id: ofed_tag + run: | + echo "ofed_tag=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_OUTPUT + - name: Clone StackHPC Kayobe repository uses: actions/checkout@v4 with: @@ -86,6 +91,7 @@ jobs: id: image_tag run: | echo image_tag=$(grep stackhpc_rocky_9_overcloud_host_image_version: etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT + working-directory: ${{ github.workspace }}/src/kayobe-config # Use the image override if set, otherwise use overcloud-os_distribution-os_release-tag - name: Output image name @@ -183,7 +189,9 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml + kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml \ + -e seed_bootstrap_user="cloud-user" \ + -e controller_bootstrap_user="cloud-user" \ env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -191,7 +199,13 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe seed host configure --skip-tags network,docker + kayobe seed host configure \ + --skip-tags network,docker,docker-registry \ + -e seed_bootstrap_user="cloud-user" \ + -e controller_bootstrap_user="cloud-user" \ + -e dnf_install_doca=true \ + -e dnf_enable_doca_modules=false \ + -e stackhpc_repos_enabled=true env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -199,7 +213,7 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe seed host command run --become --command "dnf distro-sync --refresh" + kayobe seed host command run --become --command "dnf distro-sync --refresh --assumeyes" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -240,7 +254,8 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-builder && - kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml + kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml \ + -e "ofed_tag=${{ steps.ofed_tag.outputs.ofed_tag }}" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} diff --git a/etc/kayobe/ansible/build-ofed-rocky.yml b/etc/kayobe/ansible/build-ofed-rocky.yml index d7e925547..030c196dc 100644 --- a/etc/kayobe/ansible/build-ofed-rocky.yml +++ b/etc/kayobe/ansible/build-ofed-rocky.yml @@ -1,5 +1,5 @@ --- -- name: Build OFED packages +- name: Build OFED kernel modules become: true hosts: ofed-builder gather_facts: false @@ -22,7 +22,6 @@ - rpm-build - automake - patch - - kernel - kernel-devel - autoconf - pciutils @@ -37,37 +36,10 @@ - cmake-filesystem - libnl3-devel - python3-devel + - doca-extra state: latest update_cache: true - - name: Add DOCA host repository package - ansible.builtin.dnf: - name: "https://developer.nvidia.com/downloads/networking/secure/doca-sdk/DOCA_2.8/doca-host-2.8.0-204000_{{ stackhpc_pulp_doca_ofed_version }}_rhel9{{ stackhpc_pulp_repo_rocky_9_minor_version }}.x86_64.rpm" - disable_gpg_check: true - - - name: Install DOCA extra packages - ansible.builtin.dnf: - name: doca-extra - - - name: Create build directory - ansible.builtin.file: - path: /home/cloud-user/ofed - state: directory - mode: "0777" - - - name: Set build directory - ansible.builtin.replace: - path: /opt/mellanox/doca/tools/doca-kernel-support - regexp: TMP_DIR=\$1 - replace: TMP_DIR=/home/cloud-user/ofed - - name: Build OFED kernel modules ansible.builtin.shell: - cmd: | - /opt/mellanox/doca/tools/doca-kernel-support - - - name: Download OFED userspace packages - ansible.builtin.dnf: - name: doca-ofed-userspace - download_only: true - download_dir: /home/cloud-user/ofed + cmd: /opt/mellanox/doca/tools/doca-kernel-support diff --git a/etc/kayobe/ansible/push-ofed.yml b/etc/kayobe/ansible/push-ofed.yml index 3b1130c20..de9552270 100644 --- a/etc/kayobe/ansible/push-ofed.yml +++ b/etc/kayobe/ansible/push-ofed.yml @@ -1,45 +1,80 @@ --- - name: Push OFED packages hosts: ofed-builder + vars: + venv: "/opt/kayobe/venvs/kayobe" + doca_extract_path: "/home/stack/doca" tasks: + - name: Get OFED module repo variables + ansible.builtin.set_fact: + doca_modules_repo_name: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='name') | join('') }}" + doca_modules_repo_base_path: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='base_path') | join('') }}" + doca_modules_repo_distribution_name: "{{ stackhpc_pulp_rpm_repos | selectattr('name', 'search', 'OFED') | map(attribute='distribution_name') | join('') }}" + - name: Install python dependencies ansible.builtin.pip: name: pulp-cli - - name: Create Pulp repository for OFED + - name: Ensure Pulp configuration directory exists + ansible.builtin.file: + path: /home/stack/.config/pulp/ + state: directory + recurse: true + + - name: Setup Pulp credentials + ansible.builtin.blockinfile: + path: /home/stack/.config/pulp/cli.toml + create: true + block: | + [cli] + base_url = '{{ stackhpc_release_pulp_url }}' + verify_ssl = true + format = "json" + username = '{{ stackhpc_release_pulp_username }}' + password = '{{ stackhpc_release_pulp_password }}' + no_log: true + + - name: Find DOCA kernel repo package + ansible.builtin.shell: + cmd: 'find /tmp/DOCA.* -name doca-kernel-repo-*' + register: doca_kernel_repo + changed_when: false + + - name: Unpackage kernel repo + ansible.builtin.shell: + cmd: "rpm2cpio {{ doca_kernel_repo.stdout }} | cpio -idmv -D {{ doca_extract_path }}" + + - name: Find extracted kernel packages + ansible.builtin.shell: + cmd: 'find {{ doca_extract_path }}/usr/share/doca-host-*/Modules/$(uname -r)/*.rpm' + register: doca_kernel_packages + + - name: Create Pulp repository for DOCA kernel modules pulp.squeezer.rpm_repository: pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - name: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}" + name: "{{ doca_modules_repo_name }}" state: present retries: "{{ pulp_timeout_retries | default(3) }}" - - name: Lookup Pulp RPMs on builder - ansible.builtin.find: - paths: /home/cloud-user/ofed - register: rpm_dir - - name: Upload OFED RPMs to Pulp ansible.builtin.shell: cmd: | - pulp \ - --base-url '{{ stackhpc_release_pulp_url }}' \ - --username '{{ stackhpc_release_pulp_username }}' \ - --password '{{ stackhpc_release_pulp_password }}' \ + {{ venv }}/bin/pulp \ rpm content \ --type package upload \ - --repository '{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}' \ - --file {{ item.path }} \ - with_items: "{{ rpm_dir.files }}" - no_log: true + --repository '{{ doca_modules_repo_name }}' \ + --file {{ item }} \ + --no-publish \ + with_items: "{{ doca_kernel_packages.stdout_lines }}" - name: Create Pulp publication for OFED pulp.squeezer.rpm_publication: pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - repository: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.name }}" + repository: "{{ doca_modules_repo_name }}" state: present register: publication @@ -48,7 +83,8 @@ pulp_url: "{{ stackhpc_release_pulp_url }}" username: "{{ stackhpc_release_pulp_username }}" password: "{{ stackhpc_release_pulp_password }}" - name: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.distribution_name }}" + name: "{{ doca_modules_repo_distribution_name + ofed_tag }}" publication: "{{ publication.publication.pulp_href }}" - base_path: "{{ stackhpc_pulp_repo_doca_ofed_rhel9.base_path }}" + content_guard: development + base_path: "{{ doca_modules_repo_base_path + ofed_tag }}" state: present diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 916ffe5a1..3df5a938b 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -47,7 +47,28 @@ dnf_custom_repos: "{{ stackhpc_dnf_repos if stackhpc_repos_enabled | bool else [ # To use these repos, set stackhpc_repos_enabled to true. # This is done by default for hosts in the overcloud group via a group_vars # file. -stackhpc_dnf_repos: "{{ dnf_custom_repos_el9 | combine(dnf_custom_repos_rocky_9) | combine(dnf_custom_repos_elrepo_9 if dnf_install_elrepo_9 | bool else {}) }}" +stackhpc_dnf_repos: "{{ dnf_custom_repos_el9 | combine(dnf_custom_repos_rocky_9) | combine(dnf_custom_repos_elrepo_9 if dnf_install_elrepo_9 | bool else {}) | combine(dnf_custom_repos_doca if dnf_install_doca | bool else {}) }}" + +# DOCA repositories +dnf_custom_repos_doca: + doca: + baseurl: "{{ stackhpc_repo_rhel9_doca_url }}" + description: "DOCA Online Repo {{ stackhpc_pulp_doca_version }} - RHEL $releasever" + enabled: "{{ dnf_enable_doca | bool | default(false) }}" + priority: -1 + file: doca + gpgcheck: no + username: "{{ stackhpc_repo_mirror_username | default(omit, true) }}" + password: "{{ stackhpc_repo_mirror_password | default(omit, true) }}" + doca-modules: + baseurl: "{{ stackhpc_repo_rhel9_doca_modules_url }}" + description: "OFED Kernel modules for DOCA {{ stackhpc_pulp_doca_version }} - RHEL $releasever" + enabled: "{{ dnf_enable_doca_modules | bool | default(false) }}" + priority: -2 + file: doca + gpgcheck: no + username: "{{ stackhpc_repo_mirror_username | default(omit, true) }}" + password: "{{ stackhpc_repo_mirror_password | default(omit, true) }}" # Custom repositories shared between all RHEL 9 derivatives. dnf_custom_repos_el9: @@ -132,6 +153,12 @@ dnf_enable_epel: "{{ dnf_install_epel | bool }}" # Whether to enable the ELRepo repository. This affects RedHat-based, 9.x release systems only. dnf_enable_elrepo_9: "{{ dnf_install_elrepo_9 | bool }}" +# Whether to enable DOCA repositories. This affects RedHat-based systems only. +dnf_enable_doca: "{{ dnf_install_doca | bool }}" + +# Whether to enable the DOCA kernel module repository. This affects RedHat-based systems only. +dnf_enable_doca_modules: "{{ dnf_install_doca | bool }}" + # URL of EPEL GPG keys. dnf_epel_9_gpg_key_url: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9" @@ -152,6 +179,9 @@ dnf_enable_docker: true #URL of docker repo GPG key dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" +# Whether to create a repo file for DOCA. This affects RedHat-based +# systems only. Default value is 'false'. +#dnf_install_doca: ############################################################################### # DNF Automatic configuration. diff --git a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml index 50af5d160..a3f2bbb14 100644 --- a/etc/kayobe/environments/ci-aio/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-aio/stackhpc-ci.yml @@ -50,6 +50,8 @@ stackhpc_repo_rocky_9_extras_version: "{{ stackhpc_pulp_repo_rocky_9_extras_vers stackhpc_repo_rocky_9_crb_version: "{{ stackhpc_pulp_repo_rocky_9_crb_version }}" stackhpc_repo_rocky_9_highavailability_version: "{{ stackhpc_pulp_repo_rocky_9_highavailability_version }}" stackhpc_repo_rocky_9_sig_security_common_version: "{{ stackhpc_pulp_repo_rocky_9_sig_security_common_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_pulp_repo_rhel9_doca_version }}" +stackhpc_repo_rhel9_doca_modules_version: "{{ stackhpc_pulp_repo_rhel9_doca_modules_version }}" # Rocky-and-CI-specific Pulp urls stackhpc_include_os_minor_version_in_repo_url: true diff --git a/etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml b/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/lvm.yml similarity index 96% rename from etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml rename to etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/lvm.yml index 2c9a52142..ab553dfd3 100644 --- a/etc/kayobe/environments/ci-builder/inventory/group_vars/seed/lvm.yml +++ b/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/lvm.yml @@ -1,4 +1,5 @@ --- + # List of extra LVs to include in the rootvg VG. stackhpc_lvm_group_rootvg_lvs_extra: - "{{ stackhpc_lvm_lv_docker }}" @@ -16,7 +17,7 @@ stackhpc_lvm_lv_root_size: 2g stackhpc_lvm_lv_tmp_size: 2g # StackHPC LVM lv_var LV size. -stackhpc_lvm_lv_var_size: 2g +stackhpc_lvm_lv_var_size: 3g # StackHPC LVM lv_var_tmp LV size. stackhpc_lvm_lv_var_tmp_size: 2g diff --git a/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos b/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos new file mode 100644 index 000000000..107f75c1c --- /dev/null +++ b/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos @@ -0,0 +1,3 @@ +--- + +stackhpc_repos_enabled: true diff --git a/etc/kayobe/environments/ci-builder/inventory/hosts b/etc/kayobe/environments/ci-builder/inventory/hosts index e2b89f71b..b50269868 100644 --- a/etc/kayobe/environments/ci-builder/inventory/hosts +++ b/etc/kayobe/environments/ci-builder/inventory/hosts @@ -2,6 +2,7 @@ [ofed-builder:children] seed +controllers # This is neccesary to run `overcloud deployment image build` [controllers] diff --git a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml index e3112029f..a73c27265 100644 --- a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml @@ -75,6 +75,7 @@ stackhpc_repo_rocky_9_extras_version: "{{ stackhpc_pulp_repo_rocky_9_extras_vers stackhpc_repo_rocky_9_crb_version: "{{ stackhpc_pulp_repo_rocky_9_crb_version }}" stackhpc_repo_rocky_9_highavailability_version: "{{ stackhpc_pulp_repo_rocky_9_highavailability_version }}" stackhpc_repo_rocky_9_sig_security_common_version: "{{ stackhpc_pulp_repo_rocky_9_sig_security_common_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_pulp_repo_rhel9_doca_version }}" # Rocky-and-CI-specific Pulp urls stackhpc_include_os_minor_version_in_repo_url: true @@ -107,3 +108,9 @@ ipa_build_dib_env_extra: # builder being a member of the 'overcloud' group for IPA builds. stackhpc_repos_enabled: false enable_docker_repo: true + +# Enable LVM for OFED builds +seed_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg if dnf_install_doca | bool }}" +controller_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg if dnf_install_doca | bool }}" diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index 696e3c93b..f18d0352b 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -1,12 +1,21 @@ --- -# DOCA OFED configuration +# DOCA OFED configuration. -# DOCA OFED version -stackhpc_pulp_doca_ofed_version: 24.07 +############################################################################### +# DOCA host version +stackhpc_pulp_doca_version: 2.9.1 -# DOCA OFED repositories -stackhpc_pulp_repo_doca_ofed_rhel9: - name: Mellanox Technologies doca_ofed {{ stackhpc_pulp_doca_ofed_version }} Rocky 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} - url: "{{ stackhpc_release_pulp_content_url }}/doca_ofed/{{ stackhpc_pulp_doca_ofed_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64" - distribution_name: "mlnx-ofed-{{ stackhpc_pulp_doca_ofed_version }}-rocky-9-{{ stackhpc_pulp_repo_rocky_9_minor_version }}" - base_path: "mlnx_ofed/{{ stackhpc_pulp_doca_ofed_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64" +############################################################################### +# Pulp configuration for DOCA OFED + +# Whether to sync OFED kernel module packages into the local Pulp service +stackhpc_pulp_sync_ofed_modules: false + +# DOCA Snapshot versions. The defaults use the appropriate version from +# pulp-repo-versions.yml +stackhpc_pulp_repo_rhel9_doca_version: "{{ lookup('vars', 'stackhpc_pulp_repo_rhel_9_{{ stackhpc_pulp_repo_rocky_9_minor_version }}_doca_version') }}" +stackhpc_pulp_repo_rhel9_doca_modules_version: "{{ lookup('vars', 'stackhpc_pulp_repo_rhel_9_{{ stackhpc_pulp_repo_rocky_9_minor_version }}_doca_modules_version') }}" + +############################################################################### +# Dummy variable to allow Ansible to accept this file. +workaround_ansible_issue_8743: yes diff --git a/etc/kayobe/pulp-repo-versions.yml b/etc/kayobe/pulp-repo-versions.yml index 672980bfb..ef0d2eac9 100644 --- a/etc/kayobe/pulp-repo-versions.yml +++ b/etc/kayobe/pulp-repo-versions.yml @@ -46,6 +46,10 @@ stackhpc_pulp_repo_rocky_9_5_crb_version: 20241217T005008 stackhpc_pulp_repo_rocky_9_5_extras_version: 20241216T004230 stackhpc_pulp_repo_rocky_9_5_highavailability_version: 20241202T003154 stackhpc_pulp_repo_rocky_9_sig_security_common_version: 20241127T003858 -stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20241217T045049 -stackhpc_pulp_repo_ubuntu_jammy_security_version: 20241217T071258 -stackhpc_pulp_repo_ubuntu_jammy_version: 20241217T071258 +stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20240911T041957 +stackhpc_pulp_repo_ubuntu_jammy_security_version: 20240924T064114 +stackhpc_pulp_repo_ubuntu_jammy_version: 20240924T064114 +stackhpc_pulp_repo_rhel_9_4_doca_version: 20241211T153620 +stackhpc_pulp_repo_rhel_9_4_doca_modules_version: 20241213T112245 +stackhpc_pulp_repo_rhel_9_5_doca_version: 20241211T171301 +stackhpc_pulp_repo_rhel_9_5_doca_modules_version: 20241213T112245 diff --git a/etc/kayobe/pulp.yml b/etc/kayobe/pulp.yml index e55b82bb3..e01994fbf 100644 --- a/etc/kayobe/pulp.yml +++ b/etc/kayobe/pulp.yml @@ -373,6 +373,18 @@ stackhpc_pulp_rpm_repos: base_path: "opensearch-dashboards/2.x/yum/" required: "{{ stackhpc_pulp_sync_for_local_container_build | bool and stackhpc_pulp_sync_el_9 | bool }}" + - name: DOCA Online Repo {{ stackhpc_pulp_doca_version }} - RHEL 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} + url: "{{ stackhpc_release_pulp_content_url }}/doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_version }}" + distribution_name: "doca-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" + base_path: "doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" + required: "{{ stackhpc_pulp_sync_el_9 }}" + + - name: OFED Kernel modules for DOCA {{ stackhpc_pulp_doca_version }} - RHEL 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} + url: "{{ stackhpc_release_pulp_content_url }}/doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_modules_version }}" + distribution_name: "doca-modules-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" + base_path: "doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" + required: "{{ stackhpc_pulp_sync_ofed_modules | bool and stackhpc_pulp_sync_el_9 | bool }}" + # RPM repositories stackhpc_pulp_repository_rpm_repos: >- {%- set rpm_repos = [] -%} diff --git a/etc/kayobe/stackhpc.yml b/etc/kayobe/stackhpc.yml index b1955c02b..db64a8b8c 100644 --- a/etc/kayobe/stackhpc.yml +++ b/etc/kayobe/stackhpc.yml @@ -56,6 +56,14 @@ stackhpc_repo_ceph_reef_debian_version: "{{ stackhpc_repo_distribution }}" ############################################################################### # RPMs +# DOCA +stackhpc_repo_rhel9_doca_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_repo_rhel9_doca_version }}" +stackhpc_repo_rhel9_doca_version: "{{ stackhpc_repo_distribution }}" + +# DOCA Modules +stackhpc_repo_rhel9_doca_modules_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_repo_rhel9_doca_modules_version }}" +stackhpc_repo_rhel9_doca_modules_version: "{{ stackhpc_repo_distribution }}" + # Grafana stackhpc_repo_grafana_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/grafana/oss/rpm/{{ stackhpc_repo_grafana_version }}" stackhpc_repo_grafana_version: "{{ stackhpc_repo_distribution }}" From e26298b4f8b912543a4bbe4f9ec1721dfd49e8c4 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Wed, 15 Jan 2025 15:01:48 +0000 Subject: [PATCH 02/28] Try use kernel meta package --- etc/kayobe/ansible/push-ofed.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/etc/kayobe/ansible/push-ofed.yml b/etc/kayobe/ansible/push-ofed.yml index de9552270..49e3f645a 100644 --- a/etc/kayobe/ansible/push-ofed.yml +++ b/etc/kayobe/ansible/push-ofed.yml @@ -40,15 +40,6 @@ register: doca_kernel_repo changed_when: false - - name: Unpackage kernel repo - ansible.builtin.shell: - cmd: "rpm2cpio {{ doca_kernel_repo.stdout }} | cpio -idmv -D {{ doca_extract_path }}" - - - name: Find extracted kernel packages - ansible.builtin.shell: - cmd: 'find {{ doca_extract_path }}/usr/share/doca-host-*/Modules/$(uname -r)/*.rpm' - register: doca_kernel_packages - - name: Create Pulp repository for DOCA kernel modules pulp.squeezer.rpm_repository: pulp_url: "{{ stackhpc_release_pulp_url }}" @@ -67,7 +58,7 @@ --repository '{{ doca_modules_repo_name }}' \ --file {{ item }} \ --no-publish \ - with_items: "{{ doca_kernel_packages.stdout_lines }}" + with_items: "{{ doca_kernel_repo.stdout_lines }}" - name: Create Pulp publication for OFED pulp.squeezer.rpm_publication: From 965048166d39d0e5e5b55e4fc535b56f7e82f672 Mon Sep 17 00:00:00 2001 From: Kayobe Automation Date: Tue, 21 Jan 2025 10:23:49 +0000 Subject: [PATCH 03/28] Add DOCA install playbook --- doc/source/contributor/ofed.rst | 61 ++++++++++++++++++----------- etc/kayobe/ansible/install-doca.yml | 28 +++++++++++++ etc/kayobe/dnf.yml | 4 +- etc/kayobe/pulp-repo-versions.yml | 2 +- 4 files changed, 69 insertions(+), 26 deletions(-) create mode 100644 etc/kayobe/ansible/install-doca.yml diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index e53b0f125..361f2e82f 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -4,19 +4,17 @@ OFED Warning: Experimental workflow subject to change -This section documents the workflow for building OFED packages for Release train integration. - -The workflow builds the OFED kernel modules against the latest available kernel in Release train -(as configured in SKC) and compiles them into RPM packages to be uploaded to Ark. Addtionally, -this workflow downloads the userspace OFED packages from the Nvidia repository and uploads these -to Ark. +The Nvidia DOCA framework is distributed as part of StackHPC Release Train for OFED driver support, +this repository is synced into Ark as part of the Release Train worfkflows, however to ensure +compatibility with Release Train packages, we are required to build OFED modules with support for +the latest Release Train kernel. Workflow ======== The workflow uses workflow_dispatch to manually request an OFED build, which will deploy a builder VM, apply kayobe config to the builder, upgrade the kernel, reboot, then run two Ansible playbooks -for building and uploading OFED to Ark. +for building and uploading OFED modules to Ark. Pre-requisites -------------- @@ -25,31 +23,48 @@ Before building OFED packages, the workflow will ensure that: * A full distro-sync has taken place, ensuring the kernel is upgraded. -* The bootloader has been configured to use the latest kernel +* The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) * noexec is disabled in the temporary logical volume. build-ofed ---------- -Currently we only support building Rocky Linux 9 OFED packages. - -In order to setup OFED, we're required to build kernel modules for the OFED drivers as -the kernels we provide in release train are unsupported by OFED. To accomplish this we -will need to use the doca-kernel-support from the doca-extra repository. +Currently we only support building Rocky Linux 9 OFED kerenl module packages. -We will need to instll dependencies in order to build the OFED kernel modules, and these -are installed at the beginning of the build playbook. We also install base and appstream -dependencies of userspace OFED packages here, this is intended to stop these dependencies -being pulled in later when we download the OFED packages from the doca-host repository. +The Build OFED module workflow will check that the filesystem is configured (noexec disabled) +to allow the DOCA build script to run. The workflow will also install any necessary dependencies +for the module build. -At the end of the playbook following the kernel module build, the OFED userspace packages -are downloaded from the upstream repository in order to upload these to Ark. +The build script will output a ``doca-kernel-repo`` RPM which contains all kernel modules built +as part of the workflow. When this RPM is installed, the repofile is created pointing to the +modules in `/usr/share/doca-host-/Modules//` on the host. push-ofed --------- -As we're not syncing OFED from any upstream source, and are instead creating our own -repository of custom packages, we will be required to setup the Pulp distribution/publication -and upload the content directly to Ark. This playbook uses the Pulp CLI to upload the RPMs -to Ark. +As mentioned above, the DOCA repository is synced into the `doca` repository in Ark. This workflow +will upload the ``doca-kernel-repo`` RPM to a seperate repository named `doca-modules`. The version +for this repository is set in `pulp-repo-versions.yml` and is disabled for local pulp syncs by +default. + +Install process +=============== + +Pre-requisites +-------------- + +* Ensure the OFED hosts are upgraded with the latest packages in the point release. + +* The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) + +install-doca +------------ + +A playbook is provided to install DOCA on hosts in the `mlnx` group. Ensure this group +is configured to include the hosts you wish to install DOCA on. To run the install +playbook: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/install-doca.yml diff --git a/etc/kayobe/ansible/install-doca.yml b/etc/kayobe/ansible/install-doca.yml new file mode 100644 index 000000000..1a0fee8c3 --- /dev/null +++ b/etc/kayobe/ansible/install-doca.yml @@ -0,0 +1,28 @@ +--- +- name: Install DOCA + become: true + hosts: mlnx + gather_facts: true + tasks: + - name: Get running kernel + ansible.builtin.command: + cmd: "uname -r" + register: kernel + + - name: Install kernel repo + ansible.builtin.dnf: + name: doca-kernel-repo + state: latest + update_cache: true + + - name: Ensure correct priority for DOCA modules + ansible.builtin.lineinfile: + line: "priority=-2" + insertafter: EOF + path: "/etc/yum.repos.d/doca-kernel-{{ kernel.stdout }}.repo" + + - name: Install DOCA OFED + ansible.builtin.dnf: + name: doca-ofed + state: latest + update_cache: true diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 3df5a938b..5cca933e4 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -62,9 +62,9 @@ dnf_custom_repos_doca: password: "{{ stackhpc_repo_mirror_password | default(omit, true) }}" doca-modules: baseurl: "{{ stackhpc_repo_rhel9_doca_modules_url }}" - description: "OFED Kernel modules for DOCA {{ stackhpc_pulp_doca_version }} - RHEL $releasever" + description: "OFED Kernel module repository for DOCA {{ stackhpc_pulp_doca_version }} - RHEL $releasever" enabled: "{{ dnf_enable_doca_modules | bool | default(false) }}" - priority: -2 + priority: -1 file: doca gpgcheck: no username: "{{ stackhpc_repo_mirror_username | default(omit, true) }}" diff --git a/etc/kayobe/pulp-repo-versions.yml b/etc/kayobe/pulp-repo-versions.yml index ef0d2eac9..90186ec3e 100644 --- a/etc/kayobe/pulp-repo-versions.yml +++ b/etc/kayobe/pulp-repo-versions.yml @@ -52,4 +52,4 @@ stackhpc_pulp_repo_ubuntu_jammy_version: 20240924T064114 stackhpc_pulp_repo_rhel_9_4_doca_version: 20241211T153620 stackhpc_pulp_repo_rhel_9_4_doca_modules_version: 20241213T112245 stackhpc_pulp_repo_rhel_9_5_doca_version: 20241211T171301 -stackhpc_pulp_repo_rhel_9_5_doca_modules_version: 20241213T112245 +stackhpc_pulp_repo_rhel_9_5_doca_modules_version: 20250115T150314 From c2712611d27c080164d9db91d562984739d0ffba Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 21 Jan 2025 14:01:30 +0000 Subject: [PATCH 04/28] Release note --- doc/source/contributor/ofed.rst | 6 ++++++ .../notes/doca-workflow-017931dbfd580b41.yaml | 14 ++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 releasenotes/notes/doca-workflow-017931dbfd580b41.yaml diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 361f2e82f..2b814ee93 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -58,6 +58,12 @@ Pre-requisites * The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) +* Ensure repositories have been templated by setting: + + .. code-block:: yaml + + dnf_install_doca: true + install-doca ------------ diff --git a/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml new file mode 100644 index 000000000..53bd560d1 --- /dev/null +++ b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml @@ -0,0 +1,14 @@ +--- +features: + - | + Using DOCA LTS 2.9.1. + DOCA workflows updated to build kernel modules only, relying + on Release Train synchronisation of DOCA upstream for userspace + packages. + Improved documentation now including DOCA install + with playbook. + Local Pulp syncs for DOCA/DOCA kernel repo. +fixes: + - | + Miscellaneous issues with the existing workflow resolved in this + patchset. From 9986dfc3f36514023b1b0b339603c7d2e068ab5e Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 21 Jan 2025 14:23:25 +0000 Subject: [PATCH 05/28] Fix bad merge conflict --- etc/kayobe/pulp-repo-versions.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/etc/kayobe/pulp-repo-versions.yml b/etc/kayobe/pulp-repo-versions.yml index 90186ec3e..b9a27f494 100644 --- a/etc/kayobe/pulp-repo-versions.yml +++ b/etc/kayobe/pulp-repo-versions.yml @@ -46,9 +46,9 @@ stackhpc_pulp_repo_rocky_9_5_crb_version: 20241217T005008 stackhpc_pulp_repo_rocky_9_5_extras_version: 20241216T004230 stackhpc_pulp_repo_rocky_9_5_highavailability_version: 20241202T003154 stackhpc_pulp_repo_rocky_9_sig_security_common_version: 20241127T003858 -stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20240911T041957 -stackhpc_pulp_repo_ubuntu_jammy_security_version: 20240924T064114 -stackhpc_pulp_repo_ubuntu_jammy_version: 20240924T064114 +stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20241217T045049 +stackhpc_pulp_repo_ubuntu_jammy_security_version: 20241217T071258 +stackhpc_pulp_repo_ubuntu_jammy_version: 20241217T071258 stackhpc_pulp_repo_rhel_9_4_doca_version: 20241211T153620 stackhpc_pulp_repo_rhel_9_4_doca_modules_version: 20241213T112245 stackhpc_pulp_repo_rhel_9_5_doca_version: 20241211T171301 From 2a93d4d2c06b645badbba1955b64d326231114d5 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 21 Jan 2025 14:39:40 +0000 Subject: [PATCH 06/28] Fix whitespace --- doc/source/contributor/ofed.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 2b814ee93..aa1126ee7 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -5,7 +5,7 @@ OFED Warning: Experimental workflow subject to change The Nvidia DOCA framework is distributed as part of StackHPC Release Train for OFED driver support, -this repository is synced into Ark as part of the Release Train worfkflows, however to ensure +this repository is synced into Ark as part of the Release Train worfkflows, however to ensure compatibility with Release Train packages, we are required to build OFED modules with support for the latest Release Train kernel. @@ -59,7 +59,7 @@ Pre-requisites * The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) * Ensure repositories have been templated by setting: - + .. code-block:: yaml dnf_install_doca: true From fb7858351189bdbc5b5724a269225c8bdebdd6b3 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 21 Jan 2025 14:43:07 +0000 Subject: [PATCH 07/28] Use command instead --- etc/kayobe/ansible/build-ofed-rocky.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/kayobe/ansible/build-ofed-rocky.yml b/etc/kayobe/ansible/build-ofed-rocky.yml index 030c196dc..bc50c38a6 100644 --- a/etc/kayobe/ansible/build-ofed-rocky.yml +++ b/etc/kayobe/ansible/build-ofed-rocky.yml @@ -41,5 +41,5 @@ update_cache: true - name: Build OFED kernel modules - ansible.builtin.shell: + ansible.builtin.command: cmd: /opt/mellanox/doca/tools/doca-kernel-support From f83772fc13f6f89fd6d910e1884c3401eae02e2d Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 21 Jan 2025 14:57:33 +0000 Subject: [PATCH 08/28] Default to false --- etc/kayobe/dnf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 5cca933e4..a49207839 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -181,7 +181,7 @@ dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" # Whether to create a repo file for DOCA. This affects RedHat-based # systems only. Default value is 'false'. -#dnf_install_doca: +dnf_install_doca: false ############################################################################### # DNF Automatic configuration. From d224b74d7b68889d29eddaaf11638eb2a7b79bb1 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Wed, 29 Jan 2025 11:32:04 +0000 Subject: [PATCH 09/28] Fix release note --- .../notes/doca-workflow-017931dbfd580b41.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml index 53bd560d1..8b023c4e9 100644 --- a/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml +++ b/releasenotes/notes/doca-workflow-017931dbfd580b41.yaml @@ -2,13 +2,15 @@ features: - | Using DOCA LTS 2.9.1. + - | DOCA workflows updated to build kernel modules only, relying on Release Train synchronisation of DOCA upstream for userspace packages. - Improved documentation now including DOCA install - with playbook. - Local Pulp syncs for DOCA/DOCA kernel repo. + - | + Improved documentation now including DOCA install playbook. + - | + Local Pulp syncs for DOCA/DOCA kernel module repository. fixes: - | - Miscellaneous issues with the existing workflow resolved in this - patchset. + Miscellaneous issues with the package-build-ofed workflow + are resolved in this patchset. From f66506c553a276c024817c7e48f766cd6b1d3a26 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Thu, 30 Jan 2025 11:16:36 +0000 Subject: [PATCH 10/28] Create DOCA builder env --- .github/workflows/package-build-ofed.yml | 27 ++++++++----------- .../group_vars/ofed-builder/stackhpc-repos | 3 --- .../environments/ci-builder/inventory/hosts | 4 --- .../environments/ci-builder/stackhpc-ci.yml | 6 ----- .../ci-doca-builder/.kayobe-environment | 4 +++ .../ci-doca-builder/controllers.yml | 19 +++++++++++++ .../inventory/group_vars/ofed-builder/lvm.yml | 0 .../ci-doca-builder/inventory/hosts | 3 +++ .../environments/ci-doca-builder/seed.yml | 19 +++++++++++++ .../ci-doca-builder/stackhpc-ci.yml | 8 ++++++ 10 files changed, 64 insertions(+), 29 deletions(-) delete mode 100644 etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos create mode 100644 etc/kayobe/environments/ci-doca-builder/.kayobe-environment create mode 100644 etc/kayobe/environments/ci-doca-builder/controllers.yml rename etc/kayobe/environments/{ci-builder => ci-doca-builder}/inventory/group_vars/ofed-builder/lvm.yml (100%) create mode 100644 etc/kayobe/environments/ci-doca-builder/inventory/hosts create mode 100644 etc/kayobe/environments/ci-doca-builder/seed.yml create mode 100644 etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index 0137c4f19..28f58ce14 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -19,7 +19,7 @@ on: env: ANSIBLE_FORCE_COLOR: True - KAYOBE_ENVIRONMENT: ci-builder + KAYOBE_ENVIRONMENT: ci-doca-builder KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} jobs: overcloud-ofed-packages: @@ -182,13 +182,13 @@ jobs: - name: Bootstrap the control host run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe control host bootstrap - name: Run growroot playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml \ -e seed_bootstrap_user="cloud-user" \ -e controller_bootstrap_user="cloud-user" \ @@ -198,21 +198,16 @@ jobs: - name: Configure the seed host (Builder VM) run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe seed host configure \ - --skip-tags network,docker,docker-registry \ - -e seed_bootstrap_user="cloud-user" \ - -e controller_bootstrap_user="cloud-user" \ - -e dnf_install_doca=true \ - -e dnf_enable_doca_modules=false \ - -e stackhpc_repos_enabled=true + --skip-tags network,docker,docker-registry env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} - name: Run a distro-sync run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe seed host command run --become --command "dnf distro-sync --refresh --assumeyes" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -220,7 +215,7 @@ jobs: - name: Reset BLS entries on the seed host run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reset-bls-entries.yml \ -e "reset_bls_host=ofed-builder" env: @@ -229,7 +224,7 @@ jobs: - name: Disable noexec in /var/tmp run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe seed host command run --become --command "sed -i 's/noexec,//g' /etc/fstab" env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -237,7 +232,7 @@ jobs: - name: Reboot to apply the kernel update run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/reboot.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -245,7 +240,7 @@ jobs: - name: Run OFED builder playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/build-ofed-rocky.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -253,7 +248,7 @@ jobs: - name: Run OFED upload playbook run: | source venvs/kayobe/bin/activate && - source src/kayobe-config/kayobe-env --environment ci-builder && + source src/kayobe-config/kayobe-env --environment ci-doca-builder && kayobe playbook run src/kayobe-config/etc/kayobe/ansible/push-ofed.yml \ -e "ofed_tag=${{ steps.ofed_tag.outputs.ofed_tag }}" env: diff --git a/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos b/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos deleted file mode 100644 index 107f75c1c..000000000 --- a/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/stackhpc-repos +++ /dev/null @@ -1,3 +0,0 @@ ---- - -stackhpc_repos_enabled: true diff --git a/etc/kayobe/environments/ci-builder/inventory/hosts b/etc/kayobe/environments/ci-builder/inventory/hosts index b50269868..a272c6b7f 100644 --- a/etc/kayobe/environments/ci-builder/inventory/hosts +++ b/etc/kayobe/environments/ci-builder/inventory/hosts @@ -1,9 +1,5 @@ # A 'seed' host used for building images. -[ofed-builder:children] -seed -controllers - # This is neccesary to run `overcloud deployment image build` [controllers] builder diff --git a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml index a73c27265..3f6f3ee95 100644 --- a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml @@ -108,9 +108,3 @@ ipa_build_dib_env_extra: # builder being a member of the 'overcloud' group for IPA builds. stackhpc_repos_enabled: false enable_docker_repo: true - -# Enable LVM for OFED builds -seed_lvm_groups: - - "{{ stackhpc_lvm_group_rootvg if dnf_install_doca | bool }}" -controller_lvm_groups: - - "{{ stackhpc_lvm_group_rootvg if dnf_install_doca | bool }}" diff --git a/etc/kayobe/environments/ci-doca-builder/.kayobe-environment b/etc/kayobe/environments/ci-doca-builder/.kayobe-environment new file mode 100644 index 000000000..0cedd00c8 --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/.kayobe-environment @@ -0,0 +1,4 @@ +--- + +dependencies: + - ci-builder diff --git a/etc/kayobe/environments/ci-doca-builder/controllers.yml b/etc/kayobe/environments/ci-doca-builder/controllers.yml new file mode 100644 index 000000000..a08f007ab --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/controllers.yml @@ -0,0 +1,19 @@ +--- +############################################################################### +# Controller node configuration. + +# User with which to access the controllers via SSH during bootstrap, in order +# to setup the Kayobe user account. Default is {{ os_distribution }}. +controller_bootstrap_user: cloud-user + +############################################################################### +# Controller node LVM configuration. + +# List of controller volume groups. See mrlesmithjr.manage_lvm role for +# format. +controller_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg }}" + +############################################################################### +# Dummy variable to allow Ansible to accept this file. +workaround_ansible_issue_8743: yes diff --git a/etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/lvm.yml b/etc/kayobe/environments/ci-doca-builder/inventory/group_vars/ofed-builder/lvm.yml similarity index 100% rename from etc/kayobe/environments/ci-builder/inventory/group_vars/ofed-builder/lvm.yml rename to etc/kayobe/environments/ci-doca-builder/inventory/group_vars/ofed-builder/lvm.yml diff --git a/etc/kayobe/environments/ci-doca-builder/inventory/hosts b/etc/kayobe/environments/ci-doca-builder/inventory/hosts new file mode 100644 index 000000000..c0b6fd7cb --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/inventory/hosts @@ -0,0 +1,3 @@ +[ofed-builder:children] +seed +controllers diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml new file mode 100644 index 000000000..e7c5b450d --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -0,0 +1,19 @@ +--- +############################################################################### +# Seed node configuration. + +# User with which to access the seed via SSH during bootstrap, in order +# to setup the Kayobe user account. Default is {{ os_distribution }}. +seed_bootstrap_user: cloud-user + +############################################################################### +# Seed node LVM configuration. + +# Seed lvm configuration. See inventory/group_vars/seed/lvm.yml +# for the exact configuration. +seed_lvm_groups: + - "{{ stackhpc_lvm_group_rootvg }}" + +############################################################################### +# Dummy variable to allow Ansible to accept this file. +workaround_ansible_issue_8743: yes diff --git a/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml new file mode 100644 index 000000000..5e9b0d0fc --- /dev/null +++ b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml @@ -0,0 +1,8 @@ +--- +############################################################################### +# StackHPC configuration. + +# Enable StackHPC Ark repositories for DOCA builds +stackhpc_repos_enabled: true +dnf_install_doca: true +dnf_enable_doca_modules: false From 931e82cec908adcc9326af6dea5150c3419e30e2 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Thu, 30 Jan 2025 11:46:10 +0000 Subject: [PATCH 11/28] Disable DOCA by default in group_vars --- etc/kayobe/dnf.yml | 2 +- etc/kayobe/inventory/group_vars/all/stackhpc-repos | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index a49207839..5cca933e4 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -181,7 +181,7 @@ dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" # Whether to create a repo file for DOCA. This affects RedHat-based # systems only. Default value is 'false'. -dnf_install_doca: false +#dnf_install_doca: ############################################################################### # DNF Automatic configuration. diff --git a/etc/kayobe/inventory/group_vars/all/stackhpc-repos b/etc/kayobe/inventory/group_vars/all/stackhpc-repos index 6273f2b4a..297540fdb 100644 --- a/etc/kayobe/inventory/group_vars/all/stackhpc-repos +++ b/etc/kayobe/inventory/group_vars/all/stackhpc-repos @@ -4,3 +4,6 @@ # vm has been provisioned # This behaviour is overriden for Overcloud hosts. stackhpc_repos_enabled: false + +# Disable StackHPC DOCA repositories by default +dnf_install_doca: false From aed1a30a31bf215821ba1ad6352f4c57c1f024eb Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 4 Feb 2025 11:26:58 +0000 Subject: [PATCH 12/28] Fix environment --- .github/workflows/package-build-ofed.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index 28f58ce14..b160d0923 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -151,13 +151,13 @@ jobs: - name: Write Terraform outputs run: | - cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-outputs.yml + cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-doca-builder/tf-outputs.yml ${{ steps.tf_outputs.outputs.stdout }} EOF - name: Write Terraform network config run: | - cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-builder/tf-network-allocation.yml + cat << EOF > src/kayobe-config/etc/kayobe/environments/ci-doca-builder/tf-network-allocation.yml --- aio_ips: builder: "{{ access_ip_v4.value }}" From 4aa733b0f36b72b5529a9709eb4e252b5e070a65 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 4 Feb 2025 13:39:56 +0000 Subject: [PATCH 13/28] Add Release Train documentation --- doc/source/contributor/ofed.rst | 32 +++++++++++++++++++++++++------- etc/kayobe/inventory/groups | 3 +++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index aa1126ee7..cb6ad8b4d 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -51,18 +51,36 @@ default. Install process =============== -Pre-requisites --------------- +Relase Train configuration +-------------------------- -* Ensure the OFED hosts are upgraded with the latest packages in the point release. +The DOCA kernel module repository will need to be synced to the local Pulp service. This can be enabled +in `ofed.yml`: -* The bootloader has been configured to use the latest kernel (reset-bls-entries.yml) +.. code-block:: yaml + + stackhpc_pulp_sync_ofed_modules: true + +With kernel module syncing enabled, the local Pulp can be synced with Ark by running: + +.. code-block:: console -* Ensure repositories have been templated by setting: + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-repo-sync.yml + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-repo-publish.yml - .. code-block:: yaml +DOCA repositories can be templated to hosts by running Kayobe host configure. + +.. code-block:: console + + kayobe overcloud host configure -t dnf + +StackHPC DOCA kernel modules will require the latest kernel version available in Ark for +the current Rocky minor version. You should ensure that packages are up to date by running +a package update, which can also be limited to hosts in the `mlnx` group. + +.. code-block:: console - dnf_install_doca: true + kayobe overcloud host package update --packages "*" --limit mlnx install-doca ------------ diff --git a/etc/kayobe/inventory/groups b/etc/kayobe/inventory/groups index 1028c2ca9..3a9739873 100644 --- a/etc/kayobe/inventory/groups +++ b/etc/kayobe/inventory/groups @@ -76,6 +76,9 @@ compute-vgpu [iommu:children] vgpu +[mlnx] +# Empty group to provide declaration of mlnx group. + ############################################################################### # Service groups. From 05b7150d3eba52a2ae1f88add266eaed1f1c6c0b Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Fri, 7 Feb 2025 15:47:48 +0000 Subject: [PATCH 14/28] FIx typos --- doc/source/contributor/ofed.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index cb6ad8b4d..3bb566142 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -5,7 +5,7 @@ OFED Warning: Experimental workflow subject to change The Nvidia DOCA framework is distributed as part of StackHPC Release Train for OFED driver support, -this repository is synced into Ark as part of the Release Train worfkflows, however to ensure +this repository is synced into Ark as part of the Release Train workflows, however to ensure compatibility with Release Train packages, we are required to build OFED modules with support for the latest Release Train kernel. @@ -30,7 +30,7 @@ Before building OFED packages, the workflow will ensure that: build-ofed ---------- -Currently we only support building Rocky Linux 9 OFED kerenl module packages. +Currently we only support building Rocky Linux 9 OFED kernel module packages. The Build OFED module workflow will check that the filesystem is configured (noexec disabled) to allow the DOCA build script to run. The workflow will also install any necessary dependencies @@ -44,15 +44,15 @@ push-ofed --------- As mentioned above, the DOCA repository is synced into the `doca` repository in Ark. This workflow -will upload the ``doca-kernel-repo`` RPM to a seperate repository named `doca-modules`. The version +will upload the ``doca-kernel-repo`` RPM to a separate repository named `doca-modules`. The version for this repository is set in `pulp-repo-versions.yml` and is disabled for local pulp syncs by default. Install process =============== -Relase Train configuration --------------------------- +Release Train configuration +--------------------------- The DOCA kernel module repository will need to be synced to the local Pulp service. This can be enabled in `ofed.yml`: From 155d0bacdddbcb649b11b5d478e956f673f147f8 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson <39007539+assumptionsandg@users.noreply.github.com> Date: Mon, 10 Feb 2025 15:00:40 +0000 Subject: [PATCH 15/28] Update doc/source/contributor/ofed.rst Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> --- doc/source/contributor/ofed.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 3bb566142..8e10aba4c 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -43,9 +43,9 @@ modules in `/usr/share/doca-host-/Modules//` on th push-ofed --------- -As mentioned above, the DOCA repository is synced into the `doca` repository in Ark. This workflow -will upload the ``doca-kernel-repo`` RPM to a separate repository named `doca-modules`. The version -for this repository is set in `pulp-repo-versions.yml` and is disabled for local pulp syncs by +As mentioned above, the DOCA repository is synced into the ``doca`` repository in Ark. This workflow +will upload the ``doca-kernel-repo`` RPM to a separate repository named ``doca-modules``. The version +for this repository is set in ``pulp-repo-versions.yml`` and is disabled for local pulp syncs by default. Install process From 7ed300b2b664b68349c6ff197a7f1c2e86e86a9c Mon Sep 17 00:00:00 2001 From: Jake Hutchinson <39007539+assumptionsandg@users.noreply.github.com> Date: Mon, 10 Feb 2025 15:01:15 +0000 Subject: [PATCH 16/28] Apply suggestions from code review Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> --- doc/source/contributor/ofed.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 8e10aba4c..98214235c 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -38,7 +38,7 @@ for the module build. The build script will output a ``doca-kernel-repo`` RPM which contains all kernel modules built as part of the workflow. When this RPM is installed, the repofile is created pointing to the -modules in `/usr/share/doca-host-/Modules//` on the host. +modules in ``/usr/share/doca-host-/Modules//`` on the host. push-ofed --------- @@ -76,7 +76,7 @@ DOCA repositories can be templated to hosts by running Kayobe host configure. StackHPC DOCA kernel modules will require the latest kernel version available in Ark for the current Rocky minor version. You should ensure that packages are up to date by running -a package update, which can also be limited to hosts in the `mlnx` group. +a package update, which can also be limited to hosts in the ``mlnx`` group. .. code-block:: console @@ -85,7 +85,7 @@ a package update, which can also be limited to hosts in the `mlnx` group. install-doca ------------ -A playbook is provided to install DOCA on hosts in the `mlnx` group. Ensure this group +A playbook is provided to install DOCA on hosts in the ``mlnx`` group. Ensure this group is configured to include the hosts you wish to install DOCA on. To run the install playbook: From 39f74db70ce4b7a24d403377ffa62afa0f6961d0 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson <39007539+assumptionsandg@users.noreply.github.com> Date: Mon, 10 Feb 2025 15:01:52 +0000 Subject: [PATCH 17/28] Update doc/source/contributor/ofed.rst Co-authored-by: Alex-Welsh <112560678+Alex-Welsh@users.noreply.github.com> --- doc/source/contributor/ofed.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 98214235c..44dc0096a 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -55,7 +55,7 @@ Release Train configuration --------------------------- The DOCA kernel module repository will need to be synced to the local Pulp service. This can be enabled -in `ofed.yml`: +in ``ofed.yml``: .. code-block:: yaml From af83055351eeb08a21fd4c5df1bab5ad08c6c01c Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Mon, 10 Feb 2025 15:02:56 +0000 Subject: [PATCH 18/28] Address review comments --- .github/workflows/package-build-ofed.yml | 7 ++----- doc/source/contributor/ofed.rst | 2 +- etc/kayobe/environments/ci-doca-builder/controllers.yml | 4 ---- etc/kayobe/environments/ci-doca-builder/seed.yml | 8 ++++++-- etc/kayobe/ofed.yml | 4 ++-- etc/kayobe/pulp.yml | 4 ++-- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index b160d0923..509ba7955 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -189,9 +189,7 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-doca-builder && - kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml \ - -e seed_bootstrap_user="cloud-user" \ - -e controller_bootstrap_user="cloud-user" \ + kayobe playbook run src/kayobe-config/etc/kayobe/ansible/growroot.yml env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} @@ -199,8 +197,7 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-doca-builder && - kayobe seed host configure \ - --skip-tags network,docker,docker-registry + kayobe seed host configure --skip-tags network env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 44dc0096a..d12d27ac8 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -54,7 +54,7 @@ Install process Release Train configuration --------------------------- -The DOCA kernel module repository will need to be synced to the local Pulp service. This can be enabled +DOCA repositories will need to be synced to the local Pulp service. This can be enabled in ``ofed.yml``: .. code-block:: yaml diff --git a/etc/kayobe/environments/ci-doca-builder/controllers.yml b/etc/kayobe/environments/ci-doca-builder/controllers.yml index a08f007ab..f0dfd79a3 100644 --- a/etc/kayobe/environments/ci-doca-builder/controllers.yml +++ b/etc/kayobe/environments/ci-doca-builder/controllers.yml @@ -13,7 +13,3 @@ controller_bootstrap_user: cloud-user # format. controller_lvm_groups: - "{{ stackhpc_lvm_group_rootvg }}" - -############################################################################### -# Dummy variable to allow Ansible to accept this file. -workaround_ansible_issue_8743: yes diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml index e7c5b450d..5f3304a76 100644 --- a/etc/kayobe/environments/ci-doca-builder/seed.yml +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -15,5 +15,9 @@ seed_lvm_groups: - "{{ stackhpc_lvm_group_rootvg }}" ############################################################################### -# Dummy variable to allow Ansible to accept this file. -workaround_ansible_issue_8743: yes +# Seed node additional containers configuration + +# Seed container running a "Pulp in one" service. +# https://pulpproject.org/pulp-in-one-container/ +# This can be used as a package mirror, and container image registry. +seed_pulp_container_enabled: false diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index f18d0352b..36ce7a946 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -8,8 +8,8 @@ stackhpc_pulp_doca_version: 2.9.1 ############################################################################### # Pulp configuration for DOCA OFED -# Whether to sync OFED kernel module packages into the local Pulp service -stackhpc_pulp_sync_ofed_modules: false +# Whether to sync OFED repositories into the local Pulp service +stackhpc_pulp_sync_ofed: false # DOCA Snapshot versions. The defaults use the appropriate version from # pulp-repo-versions.yml diff --git a/etc/kayobe/pulp.yml b/etc/kayobe/pulp.yml index e01994fbf..1b7868ff6 100644 --- a/etc/kayobe/pulp.yml +++ b/etc/kayobe/pulp.yml @@ -377,13 +377,13 @@ stackhpc_pulp_rpm_repos: url: "{{ stackhpc_release_pulp_content_url }}/doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_version }}" distribution_name: "doca-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" base_path: "doca/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" - required: "{{ stackhpc_pulp_sync_el_9 }}" + required: "{{ stackhpc_pulp_sync_ofed | bool and stackhpc_pulp_sync_el_9 | bool }}" - name: OFED Kernel modules for DOCA {{ stackhpc_pulp_doca_version }} - RHEL 9.{{ stackhpc_pulp_repo_rocky_9_minor_version }} url: "{{ stackhpc_release_pulp_content_url }}/doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/{{ stackhpc_pulp_repo_rhel9_doca_modules_version }}" distribution_name: "doca-modules-{{ stackhpc_pulp_doca_version }}-rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}-" base_path: "doca-modules/{{ stackhpc_pulp_doca_version }}/rhel9.{{ stackhpc_pulp_repo_rocky_9_minor_version }}/x86_64/" - required: "{{ stackhpc_pulp_sync_ofed_modules | bool and stackhpc_pulp_sync_el_9 | bool }}" + required: "{{ stackhpc_pulp_sync_ofed | bool and stackhpc_pulp_sync_el_9 | bool }}" # RPM repositories stackhpc_pulp_repository_rpm_repos: >- From d744c466a8967e9e1b1f9d1eb0306c7e8d2ac24c Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Mon, 10 Feb 2025 15:09:11 +0000 Subject: [PATCH 19/28] Disable docker repo --- etc/kayobe/environments/ci-doca-builder/seed.yml | 8 -------- etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml | 1 + 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml index 5f3304a76..ecba7d050 100644 --- a/etc/kayobe/environments/ci-doca-builder/seed.yml +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -13,11 +13,3 @@ seed_bootstrap_user: cloud-user # for the exact configuration. seed_lvm_groups: - "{{ stackhpc_lvm_group_rootvg }}" - -############################################################################### -# Seed node additional containers configuration - -# Seed container running a "Pulp in one" service. -# https://pulpproject.org/pulp-in-one-container/ -# This can be used as a package mirror, and container image registry. -seed_pulp_container_enabled: false diff --git a/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml index 5e9b0d0fc..a6e66db54 100644 --- a/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-doca-builder/stackhpc-ci.yml @@ -4,5 +4,6 @@ # Enable StackHPC Ark repositories for DOCA builds stackhpc_repos_enabled: true +enable_docker_repo: false dnf_install_doca: true dnf_enable_doca_modules: false From 0706f6ba22652406860de504c332a5122335d1a9 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Mon, 10 Feb 2025 15:21:15 +0000 Subject: [PATCH 20/28] Fix package list --- etc/kayobe/ansible/build-ofed-rocky.yml | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/etc/kayobe/ansible/build-ofed-rocky.yml b/etc/kayobe/ansible/build-ofed-rocky.yml index bc50c38a6..343183078 100644 --- a/etc/kayobe/ansible/build-ofed-rocky.yml +++ b/etc/kayobe/ansible/build-ofed-rocky.yml @@ -17,26 +17,26 @@ - name: Install package dependencies ansible.builtin.dnf: name: - - kpartx - - perl - - rpm-build + - autoconf - automake - - patch + - createrepo + - cmake-filesystem + - doca-extra + - gcc-gfortran - kernel-devel - - autoconf - - pciutils - - kernel-modules-extra - kernel-rpm-macros - - lsof + - kernel-modules-extra + - kpartx - libtool - - tk - - gcc-gfortran - - tcl - - createrepo - - cmake-filesystem - libnl3-devel + - lsof + - patch + - pciutils + - perl - python3-devel - - doca-extra + - rpm-build + - tcl + - tk state: latest update_cache: true From e92d36185b15d93d11b8b531875089f2565f444f Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Mon, 10 Feb 2025 15:59:24 +0000 Subject: [PATCH 21/28] Test docker --- .github/workflows/package-build-ofed.yml | 2 +- etc/kayobe/environments/ci-doca-builder/seed.yml | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index 509ba7955..d37edd432 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -197,7 +197,7 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-doca-builder && - kayobe seed host configure --skip-tags network + kayobe seed host configure --skip-tags network,docker env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml index ecba7d050..5f3304a76 100644 --- a/etc/kayobe/environments/ci-doca-builder/seed.yml +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -13,3 +13,11 @@ seed_bootstrap_user: cloud-user # for the exact configuration. seed_lvm_groups: - "{{ stackhpc_lvm_group_rootvg }}" + +############################################################################### +# Seed node additional containers configuration + +# Seed container running a "Pulp in one" service. +# https://pulpproject.org/pulp-in-one-container/ +# This can be used as a package mirror, and container image registry. +seed_pulp_container_enabled: false From 9766b1051fb3c50eccbf92a74df28c164f92ae23 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Mon, 10 Feb 2025 16:25:47 +0000 Subject: [PATCH 22/28] Seed configure fix --- etc/kayobe/environments/ci-doca-builder/seed.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/etc/kayobe/environments/ci-doca-builder/seed.yml b/etc/kayobe/environments/ci-doca-builder/seed.yml index 5f3304a76..ecba7d050 100644 --- a/etc/kayobe/environments/ci-doca-builder/seed.yml +++ b/etc/kayobe/environments/ci-doca-builder/seed.yml @@ -13,11 +13,3 @@ seed_bootstrap_user: cloud-user # for the exact configuration. seed_lvm_groups: - "{{ stackhpc_lvm_group_rootvg }}" - -############################################################################### -# Seed node additional containers configuration - -# Seed container running a "Pulp in one" service. -# https://pulpproject.org/pulp-in-one-container/ -# This can be used as a package mirror, and container image registry. -seed_pulp_container_enabled: false From 248270b90ea4975db22b50e5dc85790238c414ad Mon Sep 17 00:00:00 2001 From: Jake Hutchinson <39007539+assumptionsandg@users.noreply.github.com> Date: Tue, 11 Feb 2025 15:25:33 +0000 Subject: [PATCH 23/28] Update doc/source/contributor/ofed.rst Co-authored-by: Matt Anson --- doc/source/contributor/ofed.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index d12d27ac8..a64e2256c 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -59,7 +59,7 @@ in ``ofed.yml``: .. code-block:: yaml - stackhpc_pulp_sync_ofed_modules: true + stackhpc_pulp_sync_ofed: true With kernel module syncing enabled, the local Pulp can be synced with Ark by running: From 7fdaab62215168dd3d9468b00850719747cd91a7 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 11 Feb 2025 15:29:45 +0000 Subject: [PATCH 24/28] Reboot --- doc/source/contributor/ofed.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index a64e2256c..a889607ee 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -82,6 +82,19 @@ a package update, which can also be limited to hosts in the ``mlnx`` group. kayobe overcloud host package update --packages "*" --limit mlnx +To ensure the latest kernel is the default on boot, the bootloader entires will need +to be reset before rebooting. + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reset-bls-entires.yml -e reset_bls_host=mlnx + +The hosts can now be rebooted to use the latest kernel: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reboot.yml + install-doca ------------ From abb14a4d408530a8be87e4644b55c47ac252ee5e Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 11 Feb 2025 15:42:46 +0000 Subject: [PATCH 25/28] Fix reboot doc --- .github/workflows/package-build-ofed.yml | 2 +- doc/source/contributor/ofed.rst | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/package-build-ofed.yml b/.github/workflows/package-build-ofed.yml index d37edd432..2df246217 100644 --- a/.github/workflows/package-build-ofed.yml +++ b/.github/workflows/package-build-ofed.yml @@ -197,7 +197,7 @@ jobs: run: | source venvs/kayobe/bin/activate && source src/kayobe-config/kayobe-env --environment ci-doca-builder && - kayobe seed host configure --skip-tags network,docker + kayobe seed host configure --skip-tags network,docker,docker-registry env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index a889607ee..772624db5 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -89,11 +89,12 @@ to be reset before rebooting. kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reset-bls-entires.yml -e reset_bls_host=mlnx -The hosts can now be rebooted to use the latest kernel: +The hosts can now be rebooted to use the latest kernel, a rolling reboot may be applicable +here to reduce distruptions. See the `package updates documentation `. .. code-block:: console - kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reboot.yml + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/reboot.yml --limit mlnx install-doca ------------ From ce973acd866983dd35b21c704db7e7f4b41cec91 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Tue, 11 Feb 2025 16:02:07 +0000 Subject: [PATCH 26/28] Default pulp ofed sync --- doc/source/contributor/ofed.rst | 6 +++--- etc/kayobe/ofed.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 772624db5..2d5f728b9 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -54,12 +54,12 @@ Install process Release Train configuration --------------------------- -DOCA repositories will need to be synced to the local Pulp service. This can be enabled -in ``ofed.yml``: +DOCA repositories will need to be synced to the local Pulp service, ensure that DOCA RPM repositories are +enabled by setting: .. code-block:: yaml - stackhpc_pulp_sync_ofed: true + dnf_install_doca: true With kernel module syncing enabled, the local Pulp can be synced with Ark by running: diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index 36ce7a946..edbc4ecc2 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -9,7 +9,7 @@ stackhpc_pulp_doca_version: 2.9.1 # Pulp configuration for DOCA OFED # Whether to sync OFED repositories into the local Pulp service -stackhpc_pulp_sync_ofed: false +stackhpc_pulp_sync_ofed: "{{ dnf_install_doca }}" # DOCA Snapshot versions. The defaults use the appropriate version from # pulp-repo-versions.yml From 1e9f1ab968964b6b83dc2dee3714318dff69d667 Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Thu, 13 Feb 2025 11:14:39 +0000 Subject: [PATCH 27/28] Update Pulp sync condition --- doc/source/contributor/ofed.rst | 12 ++++-------- etc/kayobe/dnf.yml | 2 +- etc/kayobe/inventory/group_vars/all/stackhpc-repos | 3 --- etc/kayobe/ofed.yml | 2 +- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/doc/source/contributor/ofed.rst b/doc/source/contributor/ofed.rst index 2d5f728b9..c993f6748 100644 --- a/doc/source/contributor/ofed.rst +++ b/doc/source/contributor/ofed.rst @@ -54,14 +54,10 @@ Install process Release Train configuration --------------------------- -DOCA repositories will need to be synced to the local Pulp service, ensure that DOCA RPM repositories are -enabled by setting: - -.. code-block:: yaml - - dnf_install_doca: true - -With kernel module syncing enabled, the local Pulp can be synced with Ark by running: +DOCA repositories will need to be synced to the local Pulp service, Ensure the DOCA +hosts added to the ``mlnx`` group before running a package sync, if the group is not +empty DOCA will be synced into the local Pulp. The local Pulp can be synced with Ark +by running: .. code-block:: console diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 5cca933e4..34d46767f 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -181,7 +181,7 @@ dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" # Whether to create a repo file for DOCA. This affects RedHat-based # systems only. Default value is 'false'. -#dnf_install_doca: +dnf_install_doca: "{{ stackhpc_pulp_sync_ofed }}" ############################################################################### # DNF Automatic configuration. diff --git a/etc/kayobe/inventory/group_vars/all/stackhpc-repos b/etc/kayobe/inventory/group_vars/all/stackhpc-repos index 297540fdb..6273f2b4a 100644 --- a/etc/kayobe/inventory/group_vars/all/stackhpc-repos +++ b/etc/kayobe/inventory/group_vars/all/stackhpc-repos @@ -4,6 +4,3 @@ # vm has been provisioned # This behaviour is overriden for Overcloud hosts. stackhpc_repos_enabled: false - -# Disable StackHPC DOCA repositories by default -dnf_install_doca: false diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index edbc4ecc2..af861e914 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -9,7 +9,7 @@ stackhpc_pulp_doca_version: 2.9.1 # Pulp configuration for DOCA OFED # Whether to sync OFED repositories into the local Pulp service -stackhpc_pulp_sync_ofed: "{{ dnf_install_doca }}" +stackhpc_pulp_sync_ofed: "{{ 'mlnx' in group_names or ( 'localhost' in inventory_hostname and groups['mlnx'] | length > 0 ) }}" # DOCA Snapshot versions. The defaults use the appropriate version from # pulp-repo-versions.yml From 343014f2fc074fbc2a8a2ac6515f5ed614a3d90c Mon Sep 17 00:00:00 2001 From: Jake Hutchinson Date: Thu, 13 Feb 2025 13:20:02 +0000 Subject: [PATCH 28/28] Fixup DOCA DNF install variable --- etc/kayobe/dnf.yml | 4 ++-- etc/kayobe/ofed.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/etc/kayobe/dnf.yml b/etc/kayobe/dnf.yml index 34d46767f..c337db281 100644 --- a/etc/kayobe/dnf.yml +++ b/etc/kayobe/dnf.yml @@ -180,8 +180,8 @@ dnf_enable_docker: true dnf_docker_gpg_key_url: "https://download.docker.com/linux/centos/gpg" # Whether to create a repo file for DOCA. This affects RedHat-based -# systems only. Default value is 'false'. -dnf_install_doca: "{{ stackhpc_pulp_sync_ofed }}" +# systems only. +dnf_install_doca: "{{ 'mlnx' in group_names }}" ############################################################################### # DNF Automatic configuration. diff --git a/etc/kayobe/ofed.yml b/etc/kayobe/ofed.yml index af861e914..3ca9201fb 100644 --- a/etc/kayobe/ofed.yml +++ b/etc/kayobe/ofed.yml @@ -9,7 +9,7 @@ stackhpc_pulp_doca_version: 2.9.1 # Pulp configuration for DOCA OFED # Whether to sync OFED repositories into the local Pulp service -stackhpc_pulp_sync_ofed: "{{ 'mlnx' in group_names or ( 'localhost' in inventory_hostname and groups['mlnx'] | length > 0 ) }}" +stackhpc_pulp_sync_ofed: "{{ groups['mlnx'] | length > 0 }}" # DOCA Snapshot versions. The defaults use the appropriate version from # pulp-repo-versions.yml