Skip to content

Commit

Permalink
nvidia: rework role use new toolkit
Browse files Browse the repository at this point in the history
  • Loading branch information
saltydk committed Feb 10, 2024
1 parent 89213c9 commit b540ea5
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 25 deletions.
13 changes: 12 additions & 1 deletion resources/tasks/docker/set_docker_devices_variable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@
ansible.builtin.set_fact:
docker_devices_var_name: "{{ role_name + '_docker_devices_default' }}"

- name: Resources | Tasks | Docker | Set Docker Devices Variable | Set 'docker_devices_nvidia' temp variable
ansible.builtin.set_fact:
docker_devices_nvidia:
- /dev/nvidia-uvm
- /dev/nvidia-uvm-tools
- /dev/nvidia-modeset
- /dev/nvidiactl
- /dev/nvidia0

- name: Resources | Tasks | Docker | Set Docker Devices Variable | Set 'docker_devices' variable # noqa jinja[spacing] var-naming[no-jinja]
ansible.builtin.set_fact:
"{{ docker_devices_var_name }}": "{{ ['/dev/dri:/dev/dri'] + lookup('vars', role_name + '_docker_devices_default') }}"
"{{ docker_devices_var_name }}": "{{ (['/dev/dri:/dev/dri'] if gpu.intel else [])
+ (docker_devices_nvidia if gpu.nvidia else [])
+ lookup('vars', role_name + '_docker_devices_default', default=[]) }}"
17 changes: 4 additions & 13 deletions roles/nvidia/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,14 @@ nvidia_patch_backup_file_location: "/opt/nvidia/libnvidia-encode-backup"
# Docker
################################

nvidia_docker_runtime_apt_key_url: https://nvidia.github.io/nvidia-container-runtime/gpgkey
nvidia_docker_runtime_apt_key_url: https://nvidia.github.io/libnvidia-container/gpgkey

nvidia_docker_runtime_apt_repo_list_url: "https://nvidia.github.io/nvidia-container-runtime/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/nvidia-container-runtime.list"
nvidia_docker_runtime_apt_repo_list_url: "https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list"

nvidia_docker_runtime_apt_repo_url_list_old2:
- 'deb [signed-by=/etc/apt/trusted.gpg.d/nvidia.asc] https://nvidia.github.io/libnvidia-container/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'
- 'deb [signed-by=/etc/apt/trusted.gpg.d/nvidia.asc] https://nvidia.github.io/nvidia-container-runtime/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'

nvidia_docker_runtime_apt_repo_url_list_old:
- 'deb https://nvidia.github.io/libnvidia-container/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'
- 'deb https://nvidia.github.io/nvidia-container-runtime/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'
- 'deb [signed-by=/usr/share/keyrings/nvidia.gpg] https://nvidia.github.io/libnvidia-container/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'
- 'deb [signed-by=/usr/share/keyrings/nvidia.gpg] https://nvidia.github.io/nvidia-container-runtime/{{ ansible_distribution | lower }}{{ ansible_distribution_version }}/$(ARCH) /'
nvidia_docker_runtime_apt_repo_file_old: nvidia-container-runtime

nvidia_docker_runtime_apt_repo_file: nvidia-container-runtime

nvidia_docker_runtime_apt_package: nvidia-container-runtime
nvidia_docker_runtime_apt_repo_file: nvidia-container-toolkit

nvidia_docker_runtime_docker_daemon_json_jq_command: |
jq '."default-runtime" = "nvidia"
Expand Down
2 changes: 2 additions & 0 deletions roles/nvidia/files/71-nvidia-dev-char.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This will create /dev/char symlinks to all device nodes
ACTION=="add", DEVPATH=="/bus/pci/drivers/nvidia", RUN+="/usr/bin/nvidia-ctk system create-dev-char-symlinks --create-all"
41 changes: 30 additions & 11 deletions roles/nvidia/tasks/subtasks/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,59 @@
# GNU General Public License v3.0 #
#########################################################################
---
- name: Docker | Remove old official repository entry
ansible.builtin.apt_repository:
repo: "{{ item }}"
filename: "{{ nvidia_docker_runtime_apt_repo_file }}"
update_cache: true
- name: Docker | Remove old repository list
ansible.builtin.file:
path: "{{ nvidia_docker_runtime_apt_repo_file_old }}"
state: absent
loop: "{{ nvidia_docker_runtime_apt_repo_url_list_old + nvidia_docker_runtime_apt_repo_url_list_old2 }}"

- name: Docker | Add 'nvidia-container-runtime' APT Repo Key
- name: Docker | Add 'nvidia-container-toolkit' APT Repo Key
ansible.builtin.get_url:
url: "{{ nvidia_docker_runtime_apt_key_url }}"
dest: /etc/apt/trusted.gpg.d/nvidia.asc
mode: "0644"
force: true
register: result
retries: "{{ '0' if (not continuous_integration) else '5' }}"
delay: 10
until: result is succeeded

- name: Docker | Add 'nvidia-container-runtime' APT list
- name: Docker | Add 'nvidia-container-toolkit' APT list
ansible.builtin.apt_repository:
repo: "{{ item }}"
filename: "{{ nvidia_docker_runtime_apt_repo_file }}"
state: present
mode: "0644"
update_cache: true
loop: "{{ lookup('ansible.builtin.url', nvidia_docker_runtime_apt_repo_list_url, wantlist=True) | replace('deb', 'deb [signed-by=/etc/apt/trusted.gpg.d/nvidia.asc]') }}"
loop: "{{ lookup('ansible.builtin.url', nvidia_docker_runtime_apt_repo_list_url, wantlist=True) | regex_replace('^deb https', 'deb [signed-by=/etc/apt/trusted.gpg.d/nvidia.asc] https') }}"
when: not item.startswith('#')

- name: Docker | Install 'nvidia-container-runtime'
- name: Docker | Remove 'nvidia-container-runtime'
ansible.builtin.apt:
name: "nvidia-container-runtime"
state: absent

- name: Docker | Remove 'nvidia-container-toolkit'
ansible.builtin.apt:
name: "{{ nvidia_docker_runtime_apt_package }}"
name: "nvidia-container-toolkit"
state: absent

- name: Docker | Install 'nvidia-container-toolkit'
ansible.builtin.apt:
name: "nvidia-container-toolkit"
update_cache: true
state: present

- name: Docker | Create '/dev/char' symlinks
ansible.builtin.shell: nvidia-ctk system create-dev-char-symlinks --create-all

- name: Docker | Import '71-nvidia-dev-char.rules'
ansible.builtin.copy:
src: 71-nvidia-dev-char.rules
dest: "/lib/udev/rules.d/71-nvidia-dev-char.rules"
owner: "root"
group: "root"
mode: "0644"

- name: Docker | Populate Service Facts
ansible.builtin.service_facts:

Expand Down

0 comments on commit b540ea5

Please sign in to comment.