diff --git a/tools/system-health-check/health_check.py b/tools/system-health-check/health_check.py index f84f443..45b34b9 100644 --- a/tools/system-health-check/health_check.py +++ b/tools/system-health-check/health_check.py @@ -111,7 +111,7 @@ def check_pods_with_resources(): AND ( ( - 100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) <= """ + str(node_cpu_threshold) + """ + 100 * avg(1 - rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) >= """ + str(node_cpu_threshold) + """ ) AND ( @@ -121,7 +121,7 @@ def check_pods_with_resources(): AND ( ( - 100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) <= """ + str(node_memory_threshold) + """ + 100 * avg(1 - ((avg_over_time(node_memory_MemFree_bytes[5m]) + avg_over_time(node_memory_Cached_bytes[5m]) + avg_over_time(node_memory_Buffers_bytes[5m])) / avg_over_time(node_memory_MemTotal_bytes[5m] ))) by (instance) >= """ + str(node_memory_threshold) + """ ) AND ( @@ -135,7 +135,7 @@ def check_pods_with_resources(): ) OR ( - ((sum(kube_node_status_condition{condition="Ready", status="false"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0) + ((sum(kube_node_status_condition{condition="Ready", status="true"} * on (node) group_left(instance) label_replace(kube_node_info,"instance", "$1:9100", "internal_ip", "(.*)")) by (instance)) == 0) ) ) == 0 """ @@ -301,4 +301,4 @@ def main(): exit(1) if __name__ == '__main__': - main() \ No newline at end of file + main()