-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgpu-passthrough.yml
123 lines (117 loc) · 3.83 KB
/
gpu-passthrough.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
- name: Enable GPU passthough
hosts: compute
vars:
# NVIDIA Quadro RTX 6000/8000 PCI device IDs
vendor_id: "10de"
display_id: "1e30"
audio_id: "10f7"
usba_id: "1ad6"
usba_class: "0c0330"
usbc_id: "1ad7"
usbc_class: "0c8000"
tasks:
- name: Template dracut config
blockinfile:
path: /etc/dracut.conf.d/gpu-vfio.conf
block: |
add_drivers+="vfio vfio_iommu_type1 vfio_pci vfio_virqfd"
owner: root
group: root
mode: 0660
create: true
become: true
notify:
- Regenerate initramfs
- reboot
- name: Template udev rules to blacklist GPU usb controllers
blockinfile:
# We want this to execute as soon as possible
path: /etc/udev/rules.d/99-gpu.rules
block: |
#Remove NVIDIA USB xHCI Host Controller Devices, if present
ACTION=="add", SUBSYSTEM=="pci", ATTR{vendor}=="0x{{ vendor_id }}", ATTR{class}=="0x{{ usba_class }}", ATTR{remove}="1"
#Remove NVIDIA USB Type-C UCSI devices, if present
ACTION=="add", SUBSYSTEM=="pci", ATTR{vendor}=="0x{{ vendor_id }}", ATTR{class}=="0x{{ usbc_class }}", ATTR{remove}="1"
owner: root
group: root
mode: 0644
create: true
become: true
notify: reboot
- name: Add vfio to modules-load.d
blockinfile:
path: /etc/modules-load.d/vfio.conf
block: |
vfio
vfio_iommu_type1
vfio_pci
vfio_virqfd
owner: root
group: root
mode: 0664
create: true
become: true
notify: reboot
- name: Blacklist nouveau
blockinfile:
path: /etc/modprobe.d/blacklist-nouveau.conf
block: |
blacklist nouveau
options nouveau modeset=0
mode: 0664
owner: root
group: root
create: true
become: true
notify:
- reboot
- Regenerate initramfs
- name: Ignore unsupported model specific registers
# Occasionally, applications running in the VM may crash unexpectedly,
# whereas they would run normally on a physical machine. If, while
# running dmesg -wH, you encounter an error mentioning MSR, the reason
# for those crashes is that KVM injects a General protection fault (GPF)
# when the guest tries to access unsupported Model-specific registers
# (MSRs) - this often results in guest applications/OS crashing. A
# number of those issues can be solved by passing the ignore_msrs=1
# option to the KVM module, which will ignore unimplemented MSRs.
# source: https://wiki.archlinux.org/index.php/QEMU
blockinfile:
path: /etc/modprobe.d/kvm.conf
block: |
options kvm ignore_msrs=Y
# This option is not available in centos 7 as the kernel is too old,
# but it can help with dmesg spam in newer kernels (centos8?). Sample
# dmesg log message:
# [ +0.000002] kvm [8348]: vcpu0, guest rIP: 0xffffffffb0a767fa ignored rdmsr: 0x619
# options kvm report_ignored_msrs=N
mode: 0664
owner: root
group: root
create: true
become: true
notify: reboot
- name: Add vfio-pci.ids kernel args
include_role:
name: stackhpc.grubcmdline
vars:
kernel_cmdline:
- intel_iommu=on
- iommu=pt
- "vfio-pci.ids={{ vendor_id }}:{{ display_id }},{{ vendor_id }}:{{ audio_id }}"
kernel_cmdline_remove:
- iommu
- intel_iommu
- vfio-pci.ids
handlers:
- name: Regenerate initramfs
shell: |-
#!/bin/bash
set -eux
dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r)
become: true
- name: reboot
reboot:
msg: "Rebooting the hypervisor"
become: true