Skip to content
This repository has been archived by the owner on Mar 12, 2020. It is now read-only.

merge fcrisciani/ip-util-check:fix-peers-list and adamancini/ip-util-check:swarmctl #4

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
33 changes: 31 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,33 @@
FROM alpine:latest
RUN apk add --update bash docker jq
FROM golang:1.18.1-alpine3.15 as build-swarmctl
WORKDIR /go/src/github.com/docker
ENV GO111MODULE=auto
RUN apk add --no-cache --update bash ca-certificates curl make git openssh-client
RUN git clone https://github.com/docker/swarmkit.git swarmkit && cd swarmkit && make bin/swarmctl && cp bin/swarmctl /usr/bin/swarmctl && rm -rf /go/src/github.com/docker/swarmkit

FROM alpine:3.15 as build-protoc
ARG PROTOC_VERSION=3.20.0
RUN apk add --update --no-cache unzip curl
# download and install protoc binary and .proto files
RUN curl --silent --show-error --location --output protoc.zip \
https://github.com/google/protobuf/releases/download/v$PROTOC_VERSION/protoc-$PROTOC_VERSION-linux-x86_64.zip \
&& unzip -d /usr/local protoc.zip include/\* bin/\* \
&& rm -f protoc.zip

FROM alpine:3.15 as build-docker
ARG DOCKER_CLIENT_VERSION=20.10.10
RUN apk add --update --no-cache curl
RUN curl -SsL --output docker.tgz \
https://download.docker.com/linux/static/stable/x86_64/docker-$DOCKER_CLIENT_VERSION.tgz \
&& tar xvzf docker.tgz \
&& cp docker/docker /usr/bin/docker \
&& rm -f docker.tgz

FROM alpine:3.15
RUN apk add --no-cache --update bash jq
# WORKDIR /go/src/github.com/docker
# RUN git clone https://github.com/docker/swarmkit.git swarmkit && cd swarmkit && make bin/swarmctl && cp bin/swarmctl /usr/bin/swarmctl && rm -rf /go/src/github.com/docker/swarmkit
COPY --from=build-swarmctl /usr/bin/swarmctl /usr/bin
COPY --from=build-protoc /usr/local/. /usr/local
COPY --from=build-docker /usr/bin/docker /usr/bin
COPY ip-util-check /usr/bin
CMD [ "/usr/bin/ip-util-check" ]
25 changes: 20 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,29 @@ The script flags several potential conditions for each overlay:
cluster size scales up to a certain number of nodes
* IP address space is allocated to > 80% capacity

#### Note:
Under certain conditions, it may not be possible to accurately
count the number of IP addresses on a network due to Docker's
networking state distribution architecture.

Gossip protocol only distributes network programming to nodes that
participate in an overlay network. A node must have a container or
service task scheduled on it attached to an overlay network to be
considered an overlay network peer. Manager nodes that are not running
workloads may not be able to accurately count the number of IP addresses
on overlay networks scheduled on worker nodes. In this case, we approximate.

Building the Container
======================
docker image build -t docker/ip-util-check .

```
docker build -t docker/ip-util-check .
```

Running the Container
=====================

docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock \
docker/ip-util-check
```
docker run -it --rm \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /var/run/docker/swarm/control.sock:/var/run/swarmd.sock \
docker/ip-util-check
```
116 changes: 70 additions & 46 deletions ip-util-check
Original file line number Diff line number Diff line change
@@ -1,102 +1,126 @@
#!/bin/bash
#!/usr/bin/env bash

export DOCKER_CERT_PATH
export DOCKER_HOST
export DOCKER_TLS_VERIFY
# export DOCKER_CERT_PATH
# export DOCKER_HOST
# export DOCKER_TLS_VERIFY

set -e

# Per-network state keyed on network ID
#
declare -A NET2SUB # list of subnets for each overlay network
declare -A NET2CAP # network capacity of each overlay network
declare -A NET2NAME # network name of each overlay network
declare -A NET2NCIP # number of container IP addresses for each overlay network
declare -A NET2NVIP # number of virtual IP addresses for each overlay network
declare -A NET2NNODES # number of nodes where the overlay is currently used

declare -A NET2SUB # list of subnets for each overlay network
declare -A NET2CAP # network capacity of each overlay network
declare -A NET2NAME # network name of each overlay network
declare -A NET2NCIP # number of container IP addresses for each overlay network
declare -A NET2NVIP # number of virtual IP addresses for each overlay network
declare -A NET2NNODES # number of nodes where the overlay is currently used or if not available forfait to a 25% of the cluster
declare -A NODESESTIMATED # indicates if the number of nodes per network is estimated to 25% of the cluster or is messured

debugme() {
[[ $DEBUG = 1 ]] && "$@" || :
# be sure to append || : or || true here or use return 0, since the return code
# of this function should always be 0 to not influence anything else with an unwanted
# "false" return code (for example the script's exit code if this function is used
# as the very last command in the script)
}

# Report the general IP utilization status of an overlay network
# Args:
# - $1 - network ID to report on
report() {
echo "----"
if [ "${NET2CAP[$1]}" -eq 0 ] ; then
echo "Network ${NET2NAME[$1]}/$1 has no assigned IP addresses"
echo " Network OK"
else
USE=$(( ${NET2NCIP[$1]} + ${NET2NVIP[$1]} )) # how many IPs are currently in use
SAFECAP=$(( ${NET2CAP[$1]} * 75 / 100 )) # safe capacity is the 75% of the whole address space
echo "----"
if [ "${NET2CAP[$1]}" -eq 0 ] ; then
echo "Network ${NET2NAME[$1]}/$1 has no assigned IP addresses"
echo " Network OK"
else
USE=$(( ${NET2NCIP[$1]} + ${NET2NVIP[$1]} )) # how many IPs are currently in use
SAFECAP=$(( ${NET2CAP[$1]} * 75 / 100 )) # safe capacity is the 75% of the whole address space
HDRM=$(( ${NET2CAP[$1]} - $USE - ${NET2NNODES[$1]})) # how many IPs are currently free in the network
echo "Network ${NET2NAME[$1]}/$1 has an IP address capacity of ${NET2CAP[$1]} and uses $USE addresses spanning over ${NET2NNODES[$1]} nodes"

if [ $(( $USE + ${NET2NNODES[$1]} )) -ge ${NET2CAP[$1]} ] ; then
echo " ERROR: network will be over capacity if upgrading Docker Enterprise engine version 18.09 or later"
elif [ $(( $USE + ${NET2NNODES[$1]} )) -ge $SAFECAP ] ; then
echo -n " WARNING: network is using more than the 75% of the total space. "
if [ ${NODESESTIMATED[$1]} -eq 1 ]; then
echo "Network ${NET2NAME[$1]}/$1 has an IP address capacity of ${NET2CAP[$1]} and uses approximately $USE addresses"
else
echo "Network ${NET2NAME[$1]}/$1 has an IP address capacity of ${NET2CAP[$1]} and uses $USE addresses spanning over ${NET2NNODES[$1]} nodes"
fi
if [ $(( $USE + ${NET2NNODES[$1]} )) -ge ${NET2CAP[$1]} ] ; then
echo " ERROR: network will be over capacity if upgrading Docker Enterprise engine version 18.09 or later"
elif [ $(( $USE + ${NET2NNODES[$1]} )) -ge $SAFECAP ] ; then
echo -n " WARNING: network is using more than the 75% of the total space. "
echo "Remaining only $HDRM IPs after upgrade"
else
echo " Network OK: network will have $(( $SAFECAP - $USE - ${NET2NNODES[$1]} )) available IPs before passing the 75% subnet use"
fi
fi
else
echo " Network OK: network will have $(( $SAFECAP - $USE - ${NET2NNODES[$1]} )) available IPs before passing the 75% subnet use"
fi
fi
}


# Gather node, overlay network and service IDs
echo "Gathering basic cluster state"
NNODES=$(docker node ls -q | wc -l)
NODEIDS=$(docker node ls -q)
NETS=$(docker network ls --filter driver=overlay | awk 'NR != 1 && $2 != "ingress" {print $1}')
NETS=$(docker network ls --filter driver=overlay | awk 'NR != 1 {print $1}')
SVCIDS=$(docker service ls -q)


echo "Gathering overlay network information"
debugme set -x
for net in $NETS ; do
networkInspect=$( docker network inspect $net )
NET2NAME[$net]=$(echo $networkInspect | jq -r '.[].Name')
set +e
set +e
NET2SUB[$net]=$(echo $networkInspect | jq -r '.[].IPAM.Config[].Subnet' 2>/dev/null)

if [ -z "${NET2SUB[$net]}" ] ; then
NET2SUB[$net]=$(docker network inspect ${NET2NAME[$net]} | jq -r '.[].IPAM.Config[].Subnet' 2>/dev/null)
NET2SUB[$net]=$(/usr/bin/swarmctl network inspect ${NET2NAME[$net]} | grep Subnet | awk -F ": " '{print $2}' 2>/dev/null)
fi
set -e
NET2CAP[$net]=0
NET2NCIP[$net]=0
NET2NVIP[$net]=0
NET2NNODES[$net]=$( echo $networkInspect | jq -r '.[].Peers | length' )
if [ ${NET2NNODES[$net]} -eq 0 ]; then
# value has to be esitmated
NODESESTIMATED[$net]=1
NET2NNODES[$net]=$((${#NNODES[@]}*25/100))
# avoid non zero result for small clusters
if [ ${NET2NNODES[$net]} -eq 0 ]; then NET2NNODES[$net]=1; fi
else
NODESESTIMATED[$net]=0
fi
for sub in ${NET2SUB[$net]} ; do
pfxlen=$(echo $sub | awk -F / '{print $2}')
subcap=$(( (1 << (32 - $pfxlen)) - 3 ))
NET2CAP[$net]=$(( ${NET2CAP[$net]} + $subcap ))
done
done

debugme set +x

echo "Counting container IP allocations per network"
debugme set -x
for node in $NODEIDS ; do
for task in $(docker node ps -f 'desired-state = running' -q $node) ; do
nets=$(docker inspect $task | jq -r '.[].Spec.Networks[].Target' 2>/dev/null | cut -c 1-12)
for net in $nets; do
NET2NCIP[$net]=$((${NET2NCIP[$net]} + 1))
done
done
for task in $(docker node ps -f 'desired-state = running' -q $node) ; do
nets=$(docker inspect $task | jq -r '.[].Spec.Networks[].Target' 2>/dev/null | cut -c 1-12)
for net in $nets; do
NET2NCIP[$net]=$((${NET2NCIP[$net]} + 1))
done
done
done
debugme set +x


echo "Counting service VIP allocations per network"
debugme set -x
for svc in $SVCIDS ; do
for viprec in $(docker service inspect $svc | jq -rc '.[].Endpoint.VirtualIPs[]' 2>/dev/null); do
net=$(echo "$viprec" | jq -r '.NetworkID' | cut -c 1-12)
addr=$(echo "$viprec" | jq -r '.Addr')
NET2NVIP[$net]=$((${NET2NVIP[$net]} + 1))
done
for viprec in $(docker service inspect $svc | jq -rc '.[].Endpoint.VirtualIPs[]' 2>/dev/null); do
net=$(echo "$viprec" | jq -r '.NetworkID' | cut -c 1-12)
addr=$(echo "$viprec" | jq -r '.Addr')
NET2NVIP[$net]=$((${NET2NVIP[$net]} + 1))
done
done

debugme set +x

# Report the IP utilization for each overlay network
echo ""
echo "Overlay IP Utilization Report"
for net in $NETS ; do
report $net
report $net
done