diff --git a/pkg/schema/v1/node.go b/pkg/schema/v1/node.go index 97ab4ec1..03c8fdaf 100644 --- a/pkg/schema/v1/node.go +++ b/pkg/schema/v1/node.go @@ -1,6 +1,7 @@ package v1 import ( + "fmt" "github.com/icinga/icinga-go-library/types" "github.com/icinga/icinga-kubernetes/pkg/database" "github.com/pkg/errors" @@ -26,6 +27,8 @@ type Node struct { MemoryAllocatable int64 PodCapacity int64 Yaml string + IcingaState IcingaState + IcingaStateReason string Conditions []NodeCondition `db:"-"` Volumes []NodeVolume `db:"-"` Labels []Label `db:"-"` @@ -84,6 +87,7 @@ func (n *Node) Obtain(k8s kmetav1.Object) { n.MemoryCapacity = node.Status.Capacity.Memory().MilliValue() n.MemoryAllocatable = node.Status.Allocatable.Memory().MilliValue() n.PodCapacity = node.Status.Allocatable.Pods().Value() + n.IcingaState, n.IcingaStateReason = n.getIcingaState(node) for _, condition := range node.Status.Conditions { n.Conditions = append(n.Conditions, NodeCondition{ @@ -135,6 +139,47 @@ func (n *Node) Obtain(k8s kmetav1.Object) { n.Yaml = string(output) } +func (n *Node) getIcingaState(node *kcorev1.Node) (IcingaState, string) { + if node.Status.Phase == kcorev1.NodePending { + return Pending, fmt.Sprintf("Node %s is pending", node.Name) + } + + if node.Status.Phase == kcorev1.NodeTerminated { + return Ok, fmt.Sprintf("Node %s is terminated", node.Name) + } + + var state IcingaState + + if node.Status.Phase == kcorev1.NodeRunning { + var reason []string + + for _, condition := range n.Conditions { + if condition.Status == string(kcorev1.ConditionTrue) { + switch condition.Type { + case string(kcorev1.NodeDiskPressure): + state = Critical + reason = append(reason, fmt.Sprintf("Node %s is running out of disk space", n.Name)) + case string(kcorev1.NodeMemoryPressure): + state = Critical + reason = append(reason, fmt.Sprintf("Node %s is running out of available memory", n.Name)) + case string(kcorev1.NodePIDPressure): + state = Critical + reason = append(reason, fmt.Sprintf("Node %s is running out of process IDs", n.Name)) + case string(kcorev1.NodeNetworkUnavailable): + state = Critical + reason = append(reason, fmt.Sprintf("Node %s network is not correctly configured", n.Name)) + } + } + } + + if state != Ok { + return state, strings.Join(reason, ". ") + } + } + + return Ok, fmt.Sprintf("Node %s is healthy", n.Name) +} + func (n *Node) Relations() []database.Relation { fk := database.WithForeignKey("node_uuid") diff --git a/schema/mysql/schema.sql b/schema/mysql/schema.sql index 85a18094..40d3dcac 100644 --- a/schema/mysql/schema.sql +++ b/schema/mysql/schema.sql @@ -36,6 +36,8 @@ CREATE TABLE node ( memory_allocatable bigint unsigned NOT NULL, pod_capacity int unsigned NOT NULL, yaml mediumblob DEFAULT NULL, + icinga_state enum('ok', 'warning', 'critical', 'unknown') COLLATE utf8mb4_unicode_ci NOT NULL, + icinga_state_reason text NOT NULL, created bigint unsigned NOT NULL, PRIMARY KEY (uuid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;