From 8d91be686a109f535cbe37d68703ca2f02b610f2 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Thu, 23 Jan 2025 11:49:53 +0200 Subject: [PATCH] k8s-stack: added grafana external host management. fixes #1946 --- .../files/rules/generated/vlogs.yaml | 2 +- .../victoria-metrics-k8s-stack/CHANGELOG.md | 5 +- charts/victoria-metrics-k8s-stack/README.md | 15 ++++- .../rules/generated/alertmanager.rules.yaml | 2 +- .../files/rules/generated/etcd.yaml | 2 +- .../files/rules/generated/general.rules.yaml | 2 +- .../k8s.rules.container_cpu_limits.yaml | 2 +- .../k8s.rules.container_cpu_requests.yaml | 2 +- ...les.container_cpu_usage_seconds_total.yaml | 2 +- .../k8s.rules.container_memory_cache.yaml | 2 +- .../k8s.rules.container_memory_limits.yaml | 2 +- .../k8s.rules.container_memory_requests.yaml | 2 +- .../k8s.rules.container_memory_rss.yaml | 2 +- .../k8s.rules.container_memory_swap.yaml | 2 +- ...es.container_memory_working_set_bytes.yaml | 2 +- .../rules/generated/k8s.rules.pod_owner.yaml | 2 +- .../kube-apiserver-availability.rules.yaml | 18 +++--- .../kube-apiserver-burnrate.rules.yaml | 58 +++++++++---------- .../kube-apiserver-histogram.rules.yaml | 2 +- .../rules/generated/kube-apiserver-slos.yaml | 10 ++-- .../kube-prometheus-general.rules.yaml | 2 +- .../kube-prometheus-node-recording.rules.yaml | 2 +- .../rules/generated/kube-scheduler.rules.yaml | 2 +- .../rules/generated/kube-state-metrics.yaml | 2 +- .../files/rules/generated/kubelet.rules.yaml | 2 +- .../rules/generated/kubernetes-apps.yaml | 34 +++++------ .../rules/generated/kubernetes-resources.yaml | 10 ++-- .../rules/generated/kubernetes-storage.yaml | 2 +- .../kubernetes-system-apiserver.yaml | 6 +- .../kubernetes-system-controller-manager.yaml | 2 +- .../generated/kubernetes-system-kubelet.yaml | 34 ++++++----- .../kubernetes-system-scheduler.yaml | 2 +- .../rules/generated/kubernetes-system.yaml | 6 +- .../rules/generated/node-exporter.rules.yaml | 2 +- .../files/rules/generated/node-exporter.yaml | 2 +- .../files/rules/generated/node-network.yaml | 2 +- .../files/rules/generated/node.rules.yaml | 2 +- .../files/rules/generated/vm-health.yaml | 2 +- .../files/rules/generated/vmagent.yaml | 22 +++---- .../files/rules/generated/vmcluster.yaml | 20 +++---- .../files/rules/generated/vmoperator.yaml | 2 +- .../files/rules/generated/vmsingle.yaml | 16 ++--- .../templates/_helpers.tpl | 18 +++--- charts/victoria-metrics-k8s-stack/values.yaml | 32 +++++----- hack/rules-and-dashboards/sync_rules.py | 4 +- 45 files changed, 194 insertions(+), 172 deletions(-) diff --git a/charts/victoria-logs-single/files/rules/generated/vlogs.yaml b/charts/victoria-logs-single/files/rules/generated/vlogs.yaml index d09a6ab71..6629948d1 100644 --- a/charts/victoria-logs-single/files/rules/generated/vlogs.yaml +++ b/charts/victoria-logs-single/files/rules/generated/vlogs.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} concurrency: 2 condition: '{{ true }}' interval: 30s diff --git a/charts/victoria-metrics-k8s-stack/CHANGELOG.md b/charts/victoria-metrics-k8s-stack/CHANGELOG.md index e3f20248f..9b16e4e69 100644 --- a/charts/victoria-metrics-k8s-stack/CHANGELOG.md +++ b/charts/victoria-metrics-k8s-stack/CHANGELOG.md @@ -1,6 +1,9 @@ ## Next release -- TODO +**Update note**: This release contains breaking change. `.Values.externalVM` was renamed to `.Values.external.vm` + +- add `.Values.external.grafana.host` to configure grafana host for alerts, when `.Values.grafana.enabled: false` +- rename `.Values.externalVM` to `.Values.external.vm` for consistency ## 0.34.0 diff --git a/charts/victoria-metrics-k8s-stack/README.md b/charts/victoria-metrics-k8s-stack/README.md index b1fd9990c..dc00d93de 100644 --- a/charts/victoria-metrics-k8s-stack/README.md +++ b/charts/victoria-metrics-k8s-stack/README.md @@ -1326,7 +1326,18 @@ vmsingle: - externalVM + external.grafana + object +
+host: grafana.external.host
+
+
+ +

External Grafana host

+ + + + external.vm object
 read:
@@ -2475,7 +2486,7 @@ unauthorizedUserAccessSpec:
 
 
-

Full spec for VMAuth CRD. Allowed values described here It’s possible to use given below predefined variables in spec: * {{ .vm.read }} - parsed vmselect, vmsingle or externalVM.read URL * {{ .vm.write }} - parsed vminsert, vmsingle or externalVM.write URL

+

Full spec for VMAuth CRD. Allowed values described here It’s possible to use given below predefined variables in spec: * {{ .vm.read }} - parsed vmselect, vmsingle or external.vm.read URL * {{ .vm.write }} - parsed vminsert, vmsingle or external.vm.write URL

diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml index 1f1d50856..fb0e35823 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.alertmanager).enabled }}' name: alertmanager.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml index eb52d9fa5..b967a3e9a 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeEtcd).enabled }}' name: etcd rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml index 3f365bce7..9287bee45 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: general.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_limits.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_limits.yaml index c6448b39c..0d86579bd 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_limits.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_limits.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_cpu_limits rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_requests.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_requests.yaml index be8c36d4b..ad6b82c51 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_requests.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_requests.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_cpu_requests rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml index 81efacb10..e25c26e0c 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_cpu_usage_seconds_total rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml index 281048576..63deb6465 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_cache rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_limits.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_limits.yaml index d803ca0d1..0819aaa36 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_limits.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_limits.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_limits rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_requests.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_requests.yaml index 3f56c7cda..bc2ef3866 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_requests.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_requests.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_requests rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml index a80342fad..da04872e6 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_rss rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml index 6aa8a3a93..93797ab90 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_swap rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml index 6037850ff..9f5f377bc 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.container_memory_working_set_bytes rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml index f17ebfa3e..dcbf6b16b 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: k8s.rules.pod_owner rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml index 120ff7ef0..35485dfbc 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeApiServer).enabled }}' interval: 3m name: kube-apiserver-availability.rules @@ -38,7 +38,7 @@ rules: # write too slow sum by ({{ $clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"}) ) + ( # read too slow @@ -46,14 +46,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"}) or vector(0) ) + - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"}) + - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"}) ) ) + # errors @@ -72,14 +72,14 @@ rules: ( # too slow ( - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"}) or vector(0) ) + - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"}) + - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"}) ) + # errors @@ -97,7 +97,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + sum by ({{ $clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"}) ) + # errors diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml index 8c5b736fa..5d8725d3d 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeApiServer).enabled }}' name: kube-apiserver-burnrate.rules rules: @@ -14,14 +14,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1d])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1d])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1d])) ) ) + @@ -42,14 +42,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1h])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1h])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1h])) ) ) + @@ -70,14 +70,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[2h])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[2h])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[2h])) ) ) + @@ -98,14 +98,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[30m])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[30m])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[30m])) ) ) + @@ -126,14 +126,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[3d])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[3d])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[3d])) ) ) + @@ -154,14 +154,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[5m])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[5m])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[5m])) ) ) + @@ -182,14 +182,14 @@ rules: - ( ( - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[6h])) or vector(0) ) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[6h])) + - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[6h])) ) ) + @@ -208,7 +208,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1d])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) @@ -225,7 +225,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1h])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) @@ -242,7 +242,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[2h])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) @@ -259,7 +259,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[30m])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) @@ -276,7 +276,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[3d])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) @@ -293,7 +293,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[5m])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) @@ -310,7 +310,7 @@ rules: # too slow sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[6h])) ) + sum by ({{ $clusterLabel }}) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml index 97be8e628..bb54c6b65 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeApiServer).enabled }}' name: kube-apiserver-histogram.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml index 7fb40f7d7..e347939e8 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml @@ -1,13 +1,13 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeApiServer).enabled }}' name: kube-apiserver-slos rules: - alert: KubeAPIErrorBudgetBurn annotations: - description: 'The API server is burning too much error budget.' + description: 'The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeapierrorbudgetburn' summary: 'The API server is burning too much error budget.' condition: '{{ true }}' @@ -22,7 +22,7 @@ rules: short: 5m - alert: KubeAPIErrorBudgetBurn annotations: - description: 'The API server is burning too much error budget.' + description: 'The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeapierrorbudgetburn' summary: 'The API server is burning too much error budget.' condition: '{{ true }}' @@ -37,7 +37,7 @@ rules: short: 30m - alert: KubeAPIErrorBudgetBurn annotations: - description: 'The API server is burning too much error budget.' + description: 'The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeapierrorbudgetburn' summary: 'The API server is burning too much error budget.' condition: '{{ true }}' @@ -52,7 +52,7 @@ rules: short: 2h - alert: KubeAPIErrorBudgetBurn annotations: - description: 'The API server is burning too much error budget.' + description: 'The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeapierrorbudgetburn' summary: 'The API server is burning too much error budget.' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml index d5da7aea6..e539d89ca 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kube-prometheus-general.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml index ac06e16f2..fd373379b 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kube-prometheus-node-recording.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml index 6d7f7109d..d9d98d2c9 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeScheduler).enabled }}' name: kube-scheduler.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml index 6f480d98f..63c395202 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kube-state-metrics rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml index cc3694128..e35f37705 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubelet).enabled }}' name: kubelet.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml index 7a7727863..14eece60d 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml @@ -1,13 +1,13 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-apps rules: - alert: KubePodCrashLooping annotations: - description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubepodcrashlooping' summary: 'Pod is crash looping.' condition: '{{ true }}' @@ -17,7 +17,7 @@ rules: severity: warning - alert: KubePodNotReady annotations: - description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.' + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubepodnotready' summary: 'Pod has been in a non-ready state for more than 15 minutes.' condition: '{{ true }}' @@ -34,7 +34,7 @@ rules: severity: warning - alert: KubeDeploymentGenerationMismatch annotations: - description: 'Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.' + description: 'Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch' summary: 'Deployment generation mismatch due to possible roll-back' condition: '{{ true }}' @@ -47,7 +47,7 @@ rules: severity: warning - alert: KubeDeploymentReplicasMismatch annotations: - description: 'Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' + description: 'Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch' summary: 'Deployment has not matched the expected number of replicas.' condition: '{{ true }}' @@ -66,7 +66,7 @@ rules: severity: warning - alert: KubeDeploymentRolloutStuck annotations: - description: 'Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.' + description: 'Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedeploymentrolloutstuck' summary: 'Deployment rollout is not progressing.' condition: '{{ true }}' @@ -78,7 +78,7 @@ rules: severity: warning - alert: KubeStatefulSetReplicasMismatch annotations: - description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' + description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch' summary: 'StatefulSet has not matched the expected number of replicas.' condition: '{{ true }}' @@ -97,7 +97,7 @@ rules: severity: warning - alert: KubeStatefulSetGenerationMismatch annotations: - description: 'StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.' + description: 'StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch' summary: 'StatefulSet generation mismatch due to possible roll-back' condition: '{{ true }}' @@ -110,7 +110,7 @@ rules: severity: warning - alert: KubeStatefulSetUpdateNotRolledOut annotations: - description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.' + description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout' summary: 'StatefulSet update has not been rolled out.' condition: '{{ true }}' @@ -137,7 +137,7 @@ rules: severity: warning - alert: KubeDaemonSetRolloutStuck annotations: - description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m.' + description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck' summary: 'DaemonSet rollout is stuck.' condition: '{{ true }}' @@ -170,7 +170,7 @@ rules: severity: warning - alert: KubeContainerWaiting annotations: - description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}").' + description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}") on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubecontainerwaiting' summary: 'Pod container waiting longer than 1 hour' condition: '{{ true }}' @@ -180,7 +180,7 @@ rules: severity: warning - alert: KubeDaemonSetNotScheduled annotations: - description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedaemonsetnotscheduled' summary: 'DaemonSet pods are not scheduled.' condition: '{{ true }}' @@ -193,7 +193,7 @@ rules: severity: warning - alert: KubeDaemonSetMisScheduled annotations: - description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubedaemonsetmisscheduled' summary: 'DaemonSet pods are misscheduled.' condition: '{{ true }}' @@ -203,7 +203,7 @@ rules: severity: warning - alert: KubeJobNotCompleted annotations: - description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.' + description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubejobnotcompleted' summary: 'Job did not complete in time' condition: '{{ true }}' @@ -215,7 +215,7 @@ rules: severity: warning - alert: KubeJobFailed annotations: - description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.' + description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubejobfailed' summary: 'Job failed to complete.' condition: '{{ true }}' @@ -225,7 +225,7 @@ rules: severity: warning - alert: KubeHpaReplicasMismatch annotations: - description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.' + description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubehpareplicasmismatch' summary: 'HPA has not matched desired number of replicas.' condition: '{{ true }}' @@ -248,7 +248,7 @@ rules: severity: warning - alert: KubeHpaMaxedOut annotations: - description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.' + description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubehpamaxedout' summary: 'HPA is running at max replicas' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml index e7e4352be..196f31ccb 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-resources rules: @@ -61,7 +61,7 @@ rules: severity: warning - alert: KubeQuotaAlmostFull annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubequotaalmostfull' summary: 'Namespace quota is going to be full.' condition: '{{ true }}' @@ -75,7 +75,7 @@ rules: severity: info - alert: KubeQuotaFullyUsed annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubequotafullyused' summary: 'Namespace quota is fully used.' condition: '{{ true }}' @@ -89,7 +89,7 @@ rules: severity: info - alert: KubeQuotaExceeded annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubequotaexceeded' summary: 'Namespace quota has exceeded the limits.' condition: '{{ true }}' @@ -103,7 +103,7 @@ rules: severity: warning - alert: CPUThrottlingHigh annotations: - description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.' + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/cputhrottlinghigh' summary: 'Processes experience elevated CPU throttling.' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml index b74b40f1a..90cfca2ce 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-storage rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml index 4c8ec8bbb..52fa7f877 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-system-apiserver rules: @@ -43,7 +43,7 @@ rules: severity: warning - alert: KubeAggregatedAPIDown annotations: - description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.' + description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeaggregatedapidown' summary: 'Kubernetes aggregated API is down.' condition: '{{ true }}' @@ -63,7 +63,7 @@ rules: severity: critical - alert: KubeAPITerminatedRequests annotations: - description: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' + description: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeapiterminatedrequests' summary: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml index 19fa19786..9d22d2f57 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeControllerManager).enabled }}' name: kubernetes-system-controller-manager rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml index 45abe55a9..16beab649 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml @@ -1,13 +1,13 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-system-kubelet rules: - alert: KubeNodeNotReady annotations: - description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.' + description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubenodenotready' summary: 'Node is not ready.' condition: '{{ true }}' @@ -17,7 +17,7 @@ rules: severity: warning - alert: KubeNodeUnreachable annotations: - description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.' + description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubenodeunreachable' summary: 'Node is unreachable.' condition: '{{ true }}' @@ -27,24 +27,28 @@ rules: severity: warning - alert: KubeletTooManyPods annotations: - description: 'Kubelet ''{{`{{`}} $labels.node {{`}}`}}'' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.' + description: 'Kubelet ''{{`{{`}} $labels.node {{`}}`}}'' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubelettoomanypods' summary: 'Kubelet is running at capacity.' condition: '{{ true }}' expr: |- count by (node,{{ $clusterLabel }}) ( - (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,{{ $clusterLabel }}) group_left(node) topk by (instance,pod,namespace,{{ $clusterLabel }}) (1, kube_pod_info{job="kube-state-metrics"}) + (kube_pod_status_phase{job="kube-state-metrics", phase="Running"} == 1) + * on (namespace,pod,{{ $clusterLabel }}) group_left (node) + group by (namespace,pod,node,{{ $clusterLabel }}) ( + kube_pod_info{job="kube-state-metrics"} + ) ) / max by (node,{{ $clusterLabel }}) ( - kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 + kube_node_status_capacity{job="kube-state-metrics", resource="pods"} != 1 ) > 0.95 for: 15m labels: severity: info - alert: KubeNodeReadinessFlapping annotations: - description: 'The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.' + description: 'The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubenodereadinessflapping' summary: 'Node readiness status is flapping.' condition: '{{ true }}' @@ -54,7 +58,7 @@ rules: severity: warning - alert: KubeletPlegDurationHigh annotations: - description: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' + description: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletplegdurationhigh' summary: 'Kubelet Pod Lifecycle Event Generator is taking too long to relist.' condition: '{{ true }}' @@ -64,7 +68,7 @@ rules: severity: warning - alert: KubeletPodStartUpLatencyHigh annotations: - description: 'Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' + description: 'Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh' summary: 'Kubelet Pod startup latency is too high.' condition: '{{ true }}' @@ -74,7 +78,7 @@ rules: severity: warning - alert: KubeletClientCertificateExpiration annotations: - description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' summary: 'Kubelet client certificate is about to expire.' condition: '{{ true }}' @@ -83,7 +87,7 @@ rules: severity: warning - alert: KubeletClientCertificateExpiration annotations: - description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' summary: 'Kubelet client certificate is about to expire.' condition: '{{ true }}' @@ -92,7 +96,7 @@ rules: severity: critical - alert: KubeletServerCertificateExpiration annotations: - description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletservercertificateexpiration' summary: 'Kubelet server certificate is about to expire.' condition: '{{ true }}' @@ -101,7 +105,7 @@ rules: severity: warning - alert: KubeletServerCertificateExpiration annotations: - description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletservercertificateexpiration' summary: 'Kubelet server certificate is about to expire.' condition: '{{ true }}' @@ -110,7 +114,7 @@ rules: severity: critical - alert: KubeletClientCertificateRenewalErrors annotations: - description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' + description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletclientcertificaterenewalerrors' summary: 'Kubelet has failed to renew its client certificate.' condition: '{{ true }}' @@ -120,7 +124,7 @@ rules: severity: warning - alert: KubeletServerCertificateRenewalErrors annotations: - description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' + description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeletservercertificaterenewalerrors' summary: 'Kubelet has failed to renew its server certificate.' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml index af34e25a1..9d8fc6631 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ ($Values.kubeScheduler).enabled }}' name: kubernetes-system-scheduler rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml index c0e667d4e..5ecd6ea82 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml @@ -1,13 +1,13 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: kubernetes-system rules: - alert: KubeVersionMismatch annotations: - description: 'There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.' + description: 'There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeversionmismatch' summary: 'Different semantic versions of Kubernetes components running.' condition: '{{ true }}' @@ -17,7 +17,7 @@ rules: severity: warning - alert: KubeClientErrors annotations: - description: 'Kubernetes API server client ''{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}'' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.''' + description: 'Kubernetes API server client ''{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}'' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}.' runbook_url: '{{ $runbookUrl }}/kubernetes/kubeclienterrors' summary: 'Kubernetes API server client is experiencing errors.' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml index 1baf9e203..9271b07c6 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: node-exporter.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml index 6b6ea39ff..56b76e040 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: node-exporter rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml index 21c51e64c..6d7208d11 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: node-network rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml index 4e5bdd89d..354db8ca5 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: node.rules rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/vm-health.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/vm-health.yaml index 8defcccf4..32169543d 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/vm-health.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/vm-health.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: vm-health rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmagent.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmagent.yaml index 08b58ca2e..c1ad494fd 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmagent.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmagent.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} concurrency: 2 condition: '{{ true }}' interval: 30s @@ -9,7 +9,7 @@ name: vmagent rules: - alert: PersistentQueueIsDroppingData annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=49&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=49&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Vmagent dropped {{`{{`}} $value | humanize1024 {{`}}`}} from persistent queue on instance {{`{{`}} $labels.instance {{`}}`}} for the last 10m.' summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} is dropping data from persistent queue' condition: '{{ true }}' @@ -19,7 +19,7 @@ rules: severity: critical - alert: RejectedRemoteWriteDataBlocksAreDropped annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=79&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=79&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Job "{{`{{`}} $labels.job {{`}}`}}" on instance {{`{{`}} $labels.instance {{`}}`}} drops the rejected by remote-write server data blocks. Check the logs to find the reason for rejects.' summary: 'Vmagent is dropping data blocks that are rejected by remote storage' condition: '{{ true }}' @@ -29,7 +29,7 @@ rules: severity: warning - alert: TooManyScrapeErrors annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=31&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=31&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Job "{{`{{`}} $labels.job {{`}}`}}" on instance {{`{{`}} $labels.instance {{`}}`}} fails to scrape targets for last 15m' summary: 'Vmagent fails to scrape one or more targets' condition: '{{ true }}' @@ -39,7 +39,7 @@ rules: severity: warning - alert: TooManyWriteErrors annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=77&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=77&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Job "{{`{{`}} $labels.job {{`}}`}}" on instance {{`{{`}} $labels.instance {{`}}`}} responds with errors to write requests for last 15m.' summary: 'Vmagent responds with too many errors on data ingestion protocols' condition: '{{ true }}' @@ -52,7 +52,7 @@ rules: severity: warning - alert: TooManyRemoteWriteErrors annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=61&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=61&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Vmagent fails to push data via remote write protocol to destination \"{{`{{`}} $labels.url {{`}}`}}\"\n Ensure that destination is up and reachable." summary: 'Job "{{`{{`}} $labels.job {{`}}`}}" on instance {{`{{`}} $labels.instance {{`}}`}} fails to push to remote storage' condition: '{{ true }}' @@ -62,7 +62,7 @@ rules: severity: warning - alert: RemoteWriteConnectionIsSaturated annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=84&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=84&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "The remote write connection between vmagent \"{{`{{`}} $labels.job {{`}}`}}\" (instance {{`{{`}} $labels.instance {{`}}`}}) and destination \"{{`{{`}} $labels.url {{`}}`}}\" is saturated by more than 90% and vmagent won't be able to keep up.\n This usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage." summary: 'Remote write connection from "{{`{{`}} $labels.job {{`}}`}}" (instance {{`{{`}} $labels.instance {{`}}`}}) to {{`{{`}} $labels.url {{`}}`}} is saturated' condition: '{{ true }}' @@ -72,7 +72,7 @@ rules: severity: warning - alert: PersistentQueueForWritesIsSaturated annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=98&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=98&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Persistent queue writes for vmagent "{{`{{`}} $labels.job {{`}}`}}" (instance {{`{{`}} $labels.instance {{`}}`}}) are saturated by more than 90% and vmagent won''t be able to keep up with flushing data on disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.' summary: 'Persistent queue writes for instance {{`{{`}} $labels.instance {{`}}`}} are saturated' condition: '{{ true }}' @@ -82,7 +82,7 @@ rules: severity: warning - alert: PersistentQueueForReadsIsSaturated annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=99&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=99&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Persistent queue reads for vmagent "{{`{{`}} $labels.job {{`}}`}}" (instance {{`{{`}} $labels.instance {{`}}`}}) are saturated by more than 90% and vmagent won''t be able to keep up with reading data from the disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.' summary: 'Persistent queue reads for instance {{`{{`}} $labels.instance {{`}}`}} are saturated' condition: '{{ true }}' @@ -92,7 +92,7 @@ rules: severity: warning - alert: SeriesLimitHourReached annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=88&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=88&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Max series limit set via -remoteWrite.maxHourlySeries flag is close to reaching the max value. Then samples for new time series will be dropped instead of sending them to remote storage systems.' summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} reached 90% of the limit' condition: '{{ true }}' @@ -101,7 +101,7 @@ rules: severity: critical - alert: SeriesLimitDayReached annotations: - dashboard: '{{ $host }}/d/G7Z9GzMGz?viewPanel=90&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/G7Z9GzMGz?viewPanel=90&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Max series limit set via -remoteWrite.maxDailySeries flag is close to reaching the max value. Then samples for new time series will be dropped instead of sending them to remote storage systems.' summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} reached 90% of the limit' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmcluster.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmcluster.yaml index 8d5cca128..c4ee9b7b0 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmcluster.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmcluster.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} concurrency: 2 condition: '{{ true }}' interval: 30s @@ -9,7 +9,7 @@ name: vmcluster rules: - alert: DiskRunsOutOfSpaceIn3Days annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Taking into account current ingestion rate, free disk space will be enough only for {{`{{`}} $value | humanizeDuration {{`}}`}} on instance {{`{{`}} $labels.instance {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space up if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will run out of disk space in 3 days' condition: '{{ true }}' @@ -26,7 +26,7 @@ rules: severity: critical - alert: NodeBecomesReadonlyIn3Days annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Taking into account current ingestion rate, free disk space and -storage.minFreeDiskSpaceBytes instance {{`{{`}} $labels.instance {{`}}`}} will remain writable for {{`{{`}} $value | humanizeDuration {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space up if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will become read-only in 3 days' condition: '{{ true }}' @@ -43,7 +43,7 @@ rules: severity: warning - alert: DiskRunsOutOfSpace annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=200&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=200&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Disk utilisation on instance {{`{{`}} $labels.instance {{`}}`}} is more than 80%.\n Having less than 20% of free disk space could cripple merges processes and overall performance. Consider to limit the ingestion rate, decrease retention or scale the disk space if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} (job={{`{{`}} $labels.job {{`}}`}}) will run out of disk space soon' condition: '{{ true }}' @@ -58,7 +58,7 @@ rules: severity: critical - alert: RequestErrorsToAPI annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=52&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=52&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Requests to path {{`{{`}} $labels.path {{`}}`}} are receiving errors. Please verify if clients are sending correct requests.' summary: 'Too many errors served for {{`{{`}} $labels.job {{`}}`}} path {{`{{`}} $labels.path {{`}}`}} (instance {{`{{`}} $labels.instance {{`}}`}})' condition: '{{ true }}' @@ -69,7 +69,7 @@ rules: show_at: dashboard - alert: RPCErrors annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=44&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=44&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "RPC errors are interconnection errors between cluster components.\n Possible reasons for errors are misconfiguration, overload, network blips or unreachable components." summary: 'Too many RPC errors for {{`{{`}} $labels.job {{`}}`}} (instance {{`{{`}} $labels.instance {{`}}`}})' condition: '{{ true }}' @@ -87,7 +87,7 @@ rules: show_at: dashboard - alert: TooHighChurnRate annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=102' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=102' description: "VM constantly creates new time series.\n This effect is known as Churn Rate.\n High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries." summary: 'Churn rate is more than 10% for the last 15m' condition: '{{ true }}' @@ -102,7 +102,7 @@ rules: severity: warning - alert: TooHighChurnRate24h annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=102' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=102' description: "The number of created new time series over last 24h is 3x times higher than current number of active series.\n This effect is known as Churn Rate.\n High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries." summary: 'Too high number of new series created over last 24h' condition: '{{ true }}' @@ -115,7 +115,7 @@ rules: severity: warning - alert: TooHighSlowInsertsRate annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=108' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=108' description: 'High rate of slow inserts may be a sign of resource exhaustion for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183' summary: 'Percentage of slow inserts is more than 5% for the last 15m' condition: '{{ true }}' @@ -130,7 +130,7 @@ rules: severity: warning - alert: VminsertVmstorageConnectionIsSaturated annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=139&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=139&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "The connection between vminsert (instance {{`{{`}} $labels.instance {{`}}`}}) and vmstorage (instance {{`{{`}} $labels.addr {{`}}`}}) is saturated by more than 90% and vminsert won't be able to keep up.\n This usually means that more vminsert or vmstorage nodes must be added to the cluster in order to increase the total number of vminsert -> vmstorage links." summary: 'Connection between vminsert on {{`{{`}} $labels.instance {{`}}`}} and vmstorage on {{`{{`}} $labels.addr {{`}}`}} is saturated' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmoperator.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmoperator.yaml index f7ee310de..62547e6c2 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmoperator.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmoperator.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} condition: '{{ true }}' name: vmoperator rules: diff --git a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmsingle.yaml b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmsingle.yaml index af41585fa..8108be292 100644 --- a/charts/victoria-metrics-k8s-stack/files/rules/generated/vmsingle.yaml +++ b/charts/victoria-metrics-k8s-stack/files/rules/generated/vmsingle.yaml @@ -1,7 +1,7 @@ {{- $Values := (.helm).Values | default .Values }} {{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }} {{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }} -{{- $host := index (($Values.grafana).ingress).hosts 0 }} +{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }} concurrency: 2 condition: '{{ true }}' interval: 30s @@ -9,7 +9,7 @@ name: vmsingle rules: - alert: DiskRunsOutOfSpaceIn3Days annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=73&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=73&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Taking into account current ingestion rate, free disk space will be enough only for {{`{{`}} $value | humanizeDuration {{`}}`}} on instance {{`{{`}} $labels.instance {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will run out of disk space soon' condition: '{{ true }}' @@ -26,7 +26,7 @@ rules: severity: critical - alert: NodeBecomesReadonlyIn3Days annotations: - dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Taking into account current ingestion rate and free disk space instance {{`{{`}} $labels.instance {{`}}`}} is writable for {{`{{`}} $value | humanizeDuration {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space up if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will become read-only in 3 days' condition: '{{ true }}' @@ -43,7 +43,7 @@ rules: severity: warning - alert: DiskRunsOutOfSpace annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=53&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=53&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "Disk utilisation on instance {{`{{`}} $labels.instance {{`}}`}} is more than 80%.\n Having less than 20% of free disk space could cripple merge processes and overall performance. Consider to limit the ingestion rate, decrease retention or scale the disk space if possible." summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} (job={{`{{`}} $labels.job {{`}}`}}) will run out of disk space soon' condition: '{{ true }}' @@ -58,7 +58,7 @@ rules: severity: critical - alert: RequestErrorsToAPI annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=35&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=35&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'Requests to path {{`{{`}} $labels.path {{`}}`}} are receiving errors. Please verify if clients are sending correct requests.' summary: 'Too many errors served for path {{`{{`}} $labels.path {{`}}`}} (instance {{`{{`}} $labels.instance {{`}}`}})' condition: '{{ true }}' @@ -68,7 +68,7 @@ rules: severity: warning - alert: TooHighChurnRate annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=66&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=66&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "VM constantly creates new time series on \"{{`{{`}} $labels.instance {{`}}`}}\".\n This effect is known as Churn Rate.\n High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries." summary: 'Churn rate is more than 10% on "{{`{{`}} $labels.instance {{`}}`}}" for the last 15m' condition: '{{ true }}' @@ -83,7 +83,7 @@ rules: severity: warning - alert: TooHighChurnRate24h annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=66&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=66&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: "The number of created new time series over last 24h is 3x times higher than current number of active series on \"{{`{{`}} $labels.instance {{`}}`}}\".\n This effect is known as Churn Rate.\n High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries." summary: 'Too high number of new series on "{{`{{`}} $labels.instance {{`}}`}}" created over last 24h' condition: '{{ true }}' @@ -96,7 +96,7 @@ rules: severity: warning - alert: TooHighSlowInsertsRate annotations: - dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=68&var-instance={{`{{`}} $labels.instance {{`}}`}}' + dashboard: '{{ $grafanaHost }}/d/wNf0q_kZk?viewPanel=68&var-instance={{`{{`}} $labels.instance {{`}}`}}' description: 'High rate of slow inserts on "{{`{{`}} $labels.instance {{`}}`}}" may be a sign of resource exhaustion for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183' summary: 'Percentage of slow inserts is more than 5% on "{{`{{`}} $labels.instance {{`}}`}}" for the last 15m' condition: '{{ true }}' diff --git a/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl b/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl index bbb1e19e9..01ab0fd88 100644 --- a/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl +++ b/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl @@ -14,8 +14,8 @@ {{- $baseURL := include "vm.url" . -}} {{- $tenant := $Values.tenant | default 0 -}} {{- $_ := set $endpoint "url" (printf "%s/select/%d/prometheus" $baseURL (int $tenant)) -}} - {{- else if $Values.externalVM.read.url -}} - {{- $endpoint = $Values.externalVM.read -}} + {{- else if $Values.external.vm.read.url -}} + {{- $endpoint = $Values.external.vm.read -}} {{- end -}} {{- toYaml $endpoint -}} {{- end }} @@ -33,8 +33,8 @@ {{- $baseURL := include "vm.url" . -}} {{- $tenant := $Values.tenant | default 0 -}} {{- $_ := set $endpoint "url" (printf "%s/insert/%d/prometheus/api/v1/write" $baseURL (int $tenant)) -}} - {{- else if $Values.externalVM.write.url -}} - {{- $endpoint = $Values.externalVM.write -}} + {{- else if $Values.external.vm.write.url -}} + {{- $endpoint = $Values.external.vm.write -}} {{- end -}} {{- toYaml $endpoint -}} {{- end -}} @@ -135,7 +135,7 @@ {{- define "vm.agent.remote.write" -}} {{- $Values := (.helm).Values | default .Values }} {{- $remoteWrites := $Values.vmagent.additionalRemoteWrites | default list -}} - {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.externalVM.write.url -}} + {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.external.vm.write.url -}} {{- $remoteWrites = append $remoteWrites (fromYaml (include "vm.write.endpoint" .)) -}} {{- end -}} {{- toYaml (dict "remoteWrite" $remoteWrites) -}} @@ -169,12 +169,12 @@ {{- $readURL := urlParse (include "vm.url" .) -}} {{- $_ := set $readURL "path" (printf "%s/select" $readURL.path) -}} {{- $_ := set . "vm" (dict "read" $readURL "write" $writeURL) -}} - {{- else if or $Values.externalVM.read.url $Values.externalVM.write.url -}} + {{- else if or $Values.external.vm.read.url $Values.external.vm.write.url -}} {{- $_ := set . "vm" (default dict) -}} - {{- with $Values.externalVM.read.url -}} + {{- with $Values.external.vm.read.url -}} {{- $_ := set $.vm "read" (urlParse .) -}} {{- end -}} - {{- with $Values.externalVM.write.url -}} + {{- with $Values.external.vm.write.url -}} {{- $_ := set $.vm "write" (urlParse .) -}} {{- end -}} {{- end -}} @@ -277,7 +277,7 @@ {{- $ctx := . }} {{- $Values := (.helm).Values | default .Values }} {{- $datasources := $Values.defaultDatasources.extra | default list -}} - {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.externalVM.read -}} + {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.external.vm.read -}} {{- $readEndpoint:= include "vm.read.endpoint" $ctx | fromYaml -}} {{- $defaultDatasources := default list -}} {{- range $ds := $Values.defaultDatasources.victoriametrics.datasources }} diff --git a/charts/victoria-metrics-k8s-stack/values.yaml b/charts/victoria-metrics-k8s-stack/values.yaml index d30e94ed8..cc722aecb 100644 --- a/charts/victoria-metrics-k8s-stack/values.yaml +++ b/charts/victoria-metrics-k8s-stack/values.yaml @@ -248,18 +248,22 @@ additionalVictoriaMetricsMap: # - record: my_record # expr: 100 * my_record -# -- External VM read and write URLs -externalVM: - read: - url: "" - # bearerTokenSecret: - # name: dbaas-read-access-token - # key: bearerToken - write: - url: "" - # bearerTokenSecret: - # name: dbaas-read-access-token - # key: bearerToken +external: + # -- External Grafana host + grafana: + host: grafana.external.host + # -- External VM read and write URLs + vm: + read: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + write: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken # Configures vmsingle params vmsingle: @@ -732,8 +736,8 @@ vmauth: annotations: {} # -- (object) Full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) # It's possible to use given below predefined variables in spec: - # * `{{ .vm.read }}` - parsed vmselect, vmsingle or externalVM.read URL - # * `{{ .vm.write }}` - parsed vminsert, vmsingle or externalVM.write URL + # * `{{ .vm.read }}` - parsed vmselect, vmsingle or external.vm.read URL + # * `{{ .vm.write }}` - parsed vminsert, vmsingle or external.vm.write URL spec: port: "8427" unauthorizedUserAccessSpec: diff --git a/hack/rules-and-dashboards/sync_rules.py b/hack/rules-and-dashboards/sync_rules.py index 5b39bc53c..068ed57e0 100644 --- a/hack/rules-and-dashboards/sync_rules.py +++ b/hack/rules-and-dashboards/sync_rules.py @@ -178,7 +178,7 @@ def cluster_label_var(mo): "limitGroup": ["kubernetes-storage"], }, "http://localhost:3000": { - "replacement": "[[ $host ]]", + "replacement": "[[ $grafanaHost ]]", "init": "", }, 'job="alertmanager-main"': { @@ -275,7 +275,7 @@ def write_group_to_file(group, url, charts): content += "{{- $Values := (.helm).Values | default .Values }}\n" content += '{{- $runbookUrl := ($Values.defaultRules).runbookUrl | default "https://runbooks.prometheus-operator.dev/runbooks" }}\n' content += '{{- $clusterLabel := ($Values.global).clusterLabel | default "cluster" }}\n' - content += "{{- $host := index (($Values.grafana).ingress).hosts 0 }}\n" + content += "{{- $grafanaHost := ternary (index (($Values.grafana).ingress).hosts 0) (($Values.external).grafana).host ($Values.grafana).enabled }}\n" content += escape(lines) f.write(content)