From 22e6d3d54a6469b410aa82fd461b1f3836e116a3 Mon Sep 17 00:00:00 2001 From: Jacob Weinstock Date: Tue, 9 Jul 2024 22:18:36 -0600 Subject: [PATCH] Make relay init container script more resilient: Things like left over interfaces shouldn't cause crashloops anymore. NOTE! When flipping between stack.relay.interfaceMode=macvlan and stack.relay.interfaceMode=ipvlan, the deployment strategy must be set to Recreate. Signed-off-by: Jacob Weinstock --- .../stack/templates/init_configmap.yaml | 116 ++++++++++++++++++ tinkerbell/stack/templates/nginx.yaml | 46 ++----- tinkerbell/stack/values.yaml | 3 + 3 files changed, 131 insertions(+), 34 deletions(-) create mode 100644 tinkerbell/stack/templates/init_configmap.yaml diff --git a/tinkerbell/stack/templates/init_configmap.yaml b/tinkerbell/stack/templates/init_configmap.yaml new file mode 100644 index 00000000..a1a32dcd --- /dev/null +++ b/tinkerbell/stack/templates/init_configmap.yaml @@ -0,0 +1,116 @@ +{{- if and .Values.stack.enabled .Values.stack.relay.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: host-interface-script +data: + host_interface.sh: | + #!/usr/bin/env sh + + # This script allows us to listen and respond to DHCP requests on a host network interface and interact with Smee properly. + # This is used instead of `hostNetwork: true` because the dhcp relay requires clear paths for listening for broadcast traffic + # and sending/receiving unicast traffic to/from Smee. + + set -xeuo pipefail + + function usage() { + echo "Usage: $0 [OPTION]..." + echo "Init script for setting up a network interface to listen and respond to DHCP requests from the Host and move it into a container." + echo + echo "Options:" + echo " -s, --src Source interface for listening and responding to DHCP requests (default: default gateway interface)" + echo " -t, --type Create the interface of type, must be either ipvlan or macvlan (default: macvlan)" + echo " -c, --clean Clean up any interfaces created" + echo " -h, --help Display this help and exit" + } + + function binary_exists() { + command -v "$1" >/dev/null 2>&1 + } + + function main() { + local src_interface="$1" + local interface_type="$2" + local interface_mode="$3" + local interface_name="${interface_type}0" + + # Preparation + # Delete existing interfaces in the container + ip link del macvlan0 || true + ip link del ipvlan0 || true + ip link del ipvlan0-wa || true + # Delete existing interfaces in the host namespace + nsenter -t1 -n ip link del macvlan0 || true + nsenter -t1 -n ip link del ipvlan0 || true + nsenter -t1 -n ip link del ipvlan0-wa || true + # Create the interface + nsenter -t1 -n ip link add "${interface_name}" link "${src_interface}" type "${interface_type}" mode "${interface_mode}" || true + # Move the interface into the Pod container + pid=$(echo $$) + nsenter -t1 -n ip link set "${interface_name}" netns ${pid} || nsenter -t1 -n ip link delete "${interface_name}" + # Bring up the interface + ip link set dev "${interface_name}" up + # Set the IP address + ip addr add 127.1.1.1/32 dev "${interface_name}" noprefixroute || true + # Run ipvlan workaround + # There is an issue with ipvlan interfaces. They do not start receiving broadcast packets after creation. + # This is a workaround to get broadcast packets flowing. + # TODO(jacobweinstock): Investigate this deeper and see if this is a kernel bug. + if [[ "${interface_type}" == "ipvlan" ]]; then + nsenter -t1 -n nmap --script broadcast-dhcp-discover + nsenter -t1 -n ip link add "${interface_name}"-wa link "${src_interface}" type "${interface_type}" mode "${interface_mode}" bridge || true + nsenter -t1 -n nmap --script broadcast-dhcp-discover + fi + } + + src_interface=$(nsenter -t1 -n ip route | awk '/default/ {print $5}' | head -n1) + interface_type="macvlan" + interface_mode="bridge" + clean=false + args=$(getopt -a -o s:t:ch --long src:,type:,clean,help -- "$@") + if [[ $? -gt 0 ]]; then + usage + fi + + eval set -- ${args} + while : + do + case $1 in + -s | --src) + if [[ ! -z $2 ]]; then + src_interface=$2 + fi + shift 2 ;; + -t | --type) + if [[ "$2" == "ipvlan" ]]; then + interface_type="ipvlan" + interface_mode="l2" + fi + shift 2 ;; + -c | --clean) + clean=true + shift ;; + -h | --help) + usage + exit 1 + shift ;; + # -- means the end of the arguments; drop this, and break out of the while loop + --) shift; break ;; + *) >&2 echo Unsupported option: $1 + usage ;; + esac + done + + if "${clean}"; then + # Delete existing interfaces in the container + ip link del macvlan0 || true + ip link del ipvlan0 || true + ip link del ipvlan0-wa || true + # Delete existing interfaces in the host namespace + nsenter -t1 -n ip link del macvlan0 || true + nsenter -t1 -n ip link del ipvlan0 || true + nsenter -t1 -n ip link del ipvlan0-wa || true + exit 0 + fi + main "${src_interface}" "${interface_type}" "${interface_mode}" +{{- end }} diff --git a/tinkerbell/stack/templates/nginx.yaml b/tinkerbell/stack/templates/nginx.yaml index abc5bb6a..b40aef15 100644 --- a/tinkerbell/stack/templates/nginx.yaml +++ b/tinkerbell/stack/templates/nginx.yaml @@ -25,6 +25,8 @@ spec: {{- toYaml . | nindent 6 }} {{- end }} replicas: 1 + strategy: + type: {{ .Values.stack.deployment.strategy.type }} template: metadata: annotations: @@ -113,6 +115,12 @@ spec: items: - key: nginx.conf path: nginx.conf.template + {{- if and .Values.stack.relay.enabled $listenBroadcast }} + - name: script + configMap: + name: host-interface-script + defaultMode: 0500 + {{- end }} {{- if .Values.stack.hook.enabled }} - name: hook-artifacts hostPath: @@ -122,41 +130,11 @@ spec: {{- if and .Values.stack.relay.enabled $listenBroadcast }} initContainers: - name: relay-broadcast-interface - command: - - /bin/sh - - -c - - | - # This script allows us to listen and respond to DHCP requests on a host network interface and interact with Smee properly. - # This is used instead of `hostNetwork: true` because the dhcp relay requires clear paths for listening for broadcast traffic - # and sending/receiving unicast traffic to/from Smee. - set -xe - # if sourceInterface is not set use the interface from the default route - srcInterface="{{ $sourceInterface }}" - if [ -z "$srcInterface" ]; then - srcInterface=$(nsenter -t1 -n ip route | awk '/default/ {print $5}' | head -n1) - fi - # Create the interface. TODO: If this fails, try again with a different name? - {{- if eq $dhcpInterfaceType "ipvlan" }} - nsenter -t1 -n ip link add {{ $dhcpInterfaceName }} link ${srcInterface} type ipvlan mode l2 - {{- else }} - nsenter -t1 -n ip link add {{ $dhcpInterfaceName }} link ${srcInterface} type macvlan mode bridge - {{- end }} - # Move the interface into the POD. - pid=$(echo $$) - nsenter -t1 -n ip link set {{ $dhcpInterfaceName }} netns ${pid} || nsenter -t1 -n ip link delete {{ $dhcpInterfaceName }} - # Set the interface up - ip link set {{ $dhcpInterfaceName }} up - # Set the IP address - ip addr add 127.1.1.1/32 dev {{ $dhcpInterfaceName }} noprefixroute || true - {{- if eq $dhcpInterfaceType "ipvlan" }} - # There is an issue with ipvlan interfaces. They do not start receiving broadcast packets after creation. - # This is a workaround to get broadcast packets flowing. - # TODO(jacobweinstock): Investigate this deeper and see if this is a kernel bug. - nsenter -t1 -n ip link del {{ $dhcpInterfaceName }}-wa || true - nsenter -t1 -n nmap --script broadcast-dhcp-discover - nsenter -t1 -n ip link add {{ $dhcpInterfaceName }}-wa link ${srcInterface} type ipvlan mode l2 bridge || true - {{- end }} image: {{ .Values.stack.relay.initImage }} + command: ["/script/host_interface.sh", "-s", "{{ $sourceInterface }}", "-t", "{{ $dhcpInterfaceType }}"] + volumeMounts: + - name: script + mountPath: "/script" securityContext: privileged: true {{- end }} diff --git a/tinkerbell/stack/values.yaml b/tinkerbell/stack/values.yaml index 7f8044ca..009dac92 100644 --- a/tinkerbell/stack/values.yaml +++ b/tinkerbell/stack/values.yaml @@ -7,6 +7,9 @@ stack: selector: app: tink-stack nodeSelector: {} + deployment: + strategy: + type: RollingUpdate # stack needs to resolve DNS names in the cluster (in .svc.clusterDomain) clusterDomain: cluster.local # &publicIP is a YAML anchor. It allows us to define a value once and reference it multiple times.