Skip to content

Commit

Permalink
Fixed node health check prometheus query (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
shehbaz-pathan authored Jan 28, 2025
1 parent 43a499c commit b042dc8
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions tools/system-health-check/health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def check_pods_with_resources():
AND
(
(
100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) <= """ + str(node_cpu_threshold) + """
100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) >= """ + str(node_cpu_threshold) + """
)
AND
(
Expand All @@ -121,7 +121,7 @@ def check_pods_with_resources():
AND
(
(
100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) <= """ + str(node_memory_threshold) + """
100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) >= """ + str(node_memory_threshold) + """
)
AND
(
Expand All @@ -135,7 +135,7 @@ def check_pods_with_resources():
)
OR
(
((sum(kube_node_status_condition{condition="Ready", status="false"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0)
((sum(kube_node_status_condition{condition="Ready", status="true"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0)
)
) == 0
"""
Expand Down Expand Up @@ -301,4 +301,4 @@ def main():
exit(1)

if __name__ == '__main__':
main()
main()

0 comments on commit b042dc8

Please sign in to comment.