diff --git a/charts/kube-prometheus-stack/Chart.yaml b/charts/kube-prometheus-stack/Chart.yaml index 7b32e0f62acf..62506308cd32 100644 --- a/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kube-prometheus-stack/Chart.yaml @@ -23,7 +23,7 @@ sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus version: 43.0.0 -appVersion: 0.60.1 +appVersion: 0.61.1 kubeVersion: ">=1.16.0-0" home: https://github.com/prometheus-operator/kube-prometheus keywords: diff --git a/charts/kube-prometheus-stack/README.md b/charts/kube-prometheus-stack/README.md index b0a88bfb01d5..536afdfee142 100644 --- a/charts/kube-prometheus-stack/README.md +++ b/charts/kube-prometheus-stack/README.md @@ -81,7 +81,23 @@ _See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documen A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. ### From 42.x to 43.x -Grafana sub chart was [updated to version 6.45.0](https://github.com/prometheus-community/helm-charts/commit/8b4c79314847e07d7af10826d5c68c3d625fcc66). + +This version upgrades Prometheus-Operator to v0.61.1, Prometheus to v2.40.5 and Thanos to v0.29.10 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +The Grafana sub chart was [updated to version 6.45.0](https://github.com/prometheus-community/helm-charts/commit/8b4c79314847e07d7af10826d5c68c3d625fcc66). Users with Grafana sub chart enabled using persistence like this: ```yaml diff --git a/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml b/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml index 744a9f95f871..858e8dcd8bc3 100644 --- a/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml +++ b/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -313,8 +313,8 @@ spec: description: TLS configuration properties: ca: - description: Struct containing the CA cert to use - for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to use @@ -361,8 +361,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert file - for the targets. + description: Client certificate to present when doing + client-authentication. properties: configMap: description: ConfigMap containing data to use @@ -724,8 +724,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -773,8 +773,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -1170,8 +1170,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -1219,8 +1219,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -1626,8 +1626,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -1675,8 +1675,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -2160,8 +2160,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -2209,8 +2209,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -2552,8 +2552,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -2601,8 +2601,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -3026,8 +3026,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -3075,8 +3075,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -3437,8 +3437,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -3486,8 +3486,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -3808,8 +3808,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -3857,8 +3857,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to @@ -4229,8 +4229,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to - use for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to @@ -4278,8 +4278,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert - file for the targets. + description: Client certificate to present when + doing client-authentication. properties: configMap: description: ConfigMap containing data to diff --git a/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml b/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml index 06457f03b827..b2bc5bc9faee 100644 --- a/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml +++ b/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -892,6 +892,22 @@ spec: type: array type: object type: object + alertmanagerConfigMatcherStrategy: + description: The AlertmanagerConfigMatcherStrategy defines how AlertmanagerConfig + objects match the alerts. In the future more options may be added. + properties: + type: + default: OnNamespace + description: If set to `OnNamespace`, the operator injects a label + matcher matching the namespace of the AlertmanagerConfig object + for all its routes and inhibition rules. `None` will not add + any additional matchers other than the ones specified in the + AlertmanagerConfig. Default is `OnNamespace`. + enum: + - OnNamespace + - None + type: string + type: object alertmanagerConfigNamespaceSelector: description: Namespaces to be selected for AlertmanagerConfig discovery. If nil, only check own namespace. @@ -1205,8 +1221,8 @@ spec: description: TLS configuration for the client. properties: ca: - description: Struct containing the CA cert to use - for the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to use @@ -1253,8 +1269,8 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert file - for the targets. + description: Client certificate to present when doing + client-authentication. properties: configMap: description: ConfigMap containing data to use @@ -1430,10 +1446,11 @@ spec: for this Alertmanager instance. If empty, it defaults to `alertmanager-`. \n The Alertmanager configuration should be available under the `alertmanager.yaml` key. Additional keys from the original secret - are copied to the generated secret. \n If either the secret or the - `alertmanager.yaml` key is missing, the operator provisions an Alertmanager - configuration with one empty receiver (effectively dropping alert - notifications)." + are copied to the generated secret and mounted into the `/etc/alertmanager/config` + directory in the `alertmanager` container. \n If either the secret + or the `alertmanager.yaml` key is missing, the operator provisions + a minimal Alertmanager configuration with one empty receiver (effectively + dropping alert notifications)." type: string containers: description: 'Containers allows injecting additional containers. This diff --git a/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml b/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml index a507478507a4..d39a55275119 100644 --- a/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml +++ b/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -42,8 +42,8 @@ spec: by Prometheus. properties: attachMetadata: - description: 'Attaches node metadata to discovered targets. Only valid - for role: pod. Only valid in Prometheus versions 2.35.0 and newer.' + description: Attaches node metadata to discovered targets. Requires + Prometheus v2.35.0 and above. properties: node: description: When set to true, Prometheus must have permissions @@ -477,8 +477,8 @@ spec: description: TLS configuration to use when scraping the endpoint. properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -522,8 +522,7 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert file for - the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the diff --git a/charts/kube-prometheus-stack/crds/crd-probes.yaml b/charts/kube-prometheus-stack/crds/crd-probes.yaml index 8960962755c5..d82abd8f9f22 100644 --- a/charts/kube-prometheus-stack/crds/crd-probes.yaml +++ b/charts/kube-prometheus-stack/crds/crd-probes.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -589,7 +589,8 @@ spec: description: TLS configuration to use when scraping the endpoint. properties: ca: - description: Struct containing the CA cert to use for the targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -630,7 +631,7 @@ spec: x-kubernetes-map-type: atomic type: object cert: - description: Struct containing the client cert file for the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the targets. diff --git a/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml b/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml index 13ddec89b02c..e908b4f0b0ca 100644 --- a/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml +++ b/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -1054,6 +1054,9 @@ spec: description: BearerTokenFile to read from filesystem to use when authenticating to Alertmanager. type: string + enableHttp2: + description: Whether to enable HTTP2. + type: boolean name: description: Name of Endpoints object in Namespace. type: string @@ -1082,8 +1085,8 @@ spec: description: TLS Config to use for alertmanager connection. properties: ca: - description: Struct containing the CA cert to use for - the targets. + description: Certificate authority used when verifying + server certificates. properties: configMap: description: ConfigMap containing data to use for @@ -1134,8 +1137,8 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file - for the targets. + description: Client certificate to present when doing + client-authentication. properties: configMap: description: ConfigMap containing data to use for @@ -1328,8 +1331,8 @@ spec: description: TLS Config to use for accessing apiserver. properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -1377,8 +1380,7 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file for the - targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the @@ -4574,6 +4576,11 @@ spec: bearerTokenFile: description: File to read bearer token for remote read. type: string + filterExternalLabels: + description: Whether to use the external labels as selectors + for the remote read endpoint. Requires Prometheus v2.34.0 + and above. + type: boolean headers: additionalProperties: type: string @@ -4696,8 +4703,8 @@ spec: description: TLS Config to use for remote read. properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -4745,8 +4752,7 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file for - the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the @@ -5147,8 +5153,8 @@ spec: description: TLS Config to use for remote write. properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -5196,8 +5202,7 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file for - the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the @@ -6436,8 +6441,8 @@ spec: Maps to the ''--grpc-server-tls-*'' CLI args.' properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -6485,8 +6490,7 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file for the - targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the diff --git a/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml b/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml index 6fd613b8d137..f139ffef688b 100644 --- a/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml +++ b/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -44,18 +44,26 @@ spec: groups: description: Content of Prometheus rule file items: - description: 'RuleGroup is a list of sequentially evaluated recording - and alerting rules. Note: PartialResponseStrategy is only used - by ThanosRuler and will be ignored by Prometheus instances. Valid - values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response' + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. properties: interval: + description: Interval determines how often rules in the group + are evaluated. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string name: + description: Name of the rule group. + minLength: 1 type: string partial_response_strategy: + default: "" + description: 'PartialResponseStrategy is only used by ThanosRuler + and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response' + pattern: ^(?i)(abort|warn)?$ type: string rules: + description: List of alerting and recording rules. items: description: 'Rule describes an alerting or recording rule See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) @@ -63,23 +71,35 @@ spec: rule' properties: alert: + description: Name of the alert. Must be a valid label + value. Only one of `record` and `alert` must be set. type: string annotations: additionalProperties: type: string + description: Annotations to add to each alert. Only valid + for alerting rules. type: object expr: anyOf: - type: integer - type: string + description: PromQL expression to evaluate. x-kubernetes-int-or-string: true for: + description: Alerts are considered firing once they have + been returned for this long. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string labels: additionalProperties: type: string + description: Labels to add or overwrite. type: object record: + description: Name of the time series to output to. Must + be a valid metric name. Only one of `record` and `alert` + must be set. type: string required: - expr @@ -90,6 +110,9 @@ spec: - rules type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object required: - spec diff --git a/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml b/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml index a3159b6ba743..92ecc354db09 100644 --- a/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml +++ b/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -41,6 +41,15 @@ spec: description: Specification of desired Service selection for target discovery by Prometheus. properties: + attachMetadata: + description: Attaches node metadata to discovered targets. Requires + Prometheus v2.37.0 and above. + properties: + node: + description: When set to true, Prometheus must have permissions + to get Nodes. + type: boolean + type: object endpoints: description: A list of endpoints allowed as part of this ServiceMonitor. items: @@ -147,6 +156,10 @@ spec: enableHttp2: description: Whether to enable HTTP2. type: boolean + filterRunning: + description: 'Drop pods that are not running. (Failed, Succeeded). + Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase' + type: boolean followRedirects: description: FollowRedirects configures whether scrape requests follow HTTP 3xx redirects. @@ -436,8 +449,8 @@ spec: description: TLS configuration to use when scraping the endpoint properties: ca: - description: Struct containing the CA cert to use for the - targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the @@ -485,8 +498,7 @@ spec: to use for the targets. type: string cert: - description: Struct containing the client cert file for - the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the diff --git a/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml b/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml index 0b8cf0d40d8b..755972768138 100644 --- a/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml +++ b/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml @@ -1,4 +1,4 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition @@ -2238,7 +2238,8 @@ spec: the ''--grpc-server-tls-*'' CLI args.' properties: ca: - description: Struct containing the CA cert to use for the targets. + description: Certificate authority used when verifying server + certificates. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -2283,7 +2284,7 @@ spec: use for the targets. type: string cert: - description: Struct containing the client cert file for the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the targets. diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml index b6b3e1d4c392..c51cfe746f8b 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml @@ -2545,7 +2545,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}namespace{{`}}`}}", @@ -2634,7 +2634,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}namespace{{`}}`}}", @@ -2895,7 +2895,7 @@ data: ], "targets": [ { - "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2904,7 +2904,7 @@ data: "step": 10 }, { - "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2913,7 +2913,7 @@ data: "step": 10 }, { - "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2922,7 +2922,7 @@ data: "step": 10 }, { - "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2931,7 +2931,7 @@ data: "step": 10 }, { - "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2940,7 +2940,7 @@ data: "step": 10 }, { - "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml index 9151a2884bbd..a409a5ec4ebf 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml @@ -2227,7 +2227,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}pod{{`}}`}}", @@ -2316,7 +2316,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}pod{{`}}`}}", @@ -2577,7 +2577,7 @@ data: ], "targets": [ { - "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2586,7 +2586,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2595,7 +2595,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2604,7 +2604,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2613,7 +2613,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2622,7 +2622,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml index 4efba7032c9c..ac1e134cd634 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml @@ -1665,7 +1665,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", @@ -1673,7 +1673,7 @@ data: "step": 10 }, { - "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", @@ -1762,7 +1762,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Reads", @@ -1770,7 +1770,7 @@ data: "step": 10 }, { - "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "Writes", @@ -2222,7 +2222,7 @@ data: ], "targets": [ { - "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2231,7 +2231,7 @@ data: "step": 10 }, { - "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2240,7 +2240,7 @@ data: "step": 10 }, { - "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2249,7 +2249,7 @@ data: "step": 10 }, { - "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2258,7 +2258,7 @@ data: "step": 10 }, { - "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2267,7 +2267,7 @@ data: "step": 10 }, { - "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml index a0bdbb75c61c..31108728b503 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml @@ -918,7 +918,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", cluster=\"$cluster\"})))\n", + "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml index 3a706b849398..7e5df089992b 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml @@ -495,21 +495,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} read", "refId": "A" }, { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} written", "refId": "B" }, { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} io time", @@ -663,14 +663,14 @@ data: "span": 6, "targets": [ { - "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"})\n", + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" }, { - "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"})\n", + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml index d575e048ec02..4e9c83232e3c 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml @@ -488,21 +488,21 @@ data: "steppedLine": false, "targets": [ { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} read", "refId": "A" }, { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} written", "refId": "B" }, { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)\"}[$__rate_interval])", + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{`{{`}}device{{`}}`}} io time", @@ -656,14 +656,14 @@ data: "span": 6, "targets": [ { - "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"})\n", + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "" }, { - "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"})\n", + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", "format": "table", "instant": true, "intervalFactor": 2, diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml index 124ee371aff0..050231e258ae 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml @@ -118,7 +118,7 @@ data: ], "type": "number", - "unit": "short" + "unit": "s" }, { "alias": "Instance", diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml index c3cfe36ca257..7b4ebb9b5f0c 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml @@ -62,9 +62,9 @@ spec: record: instance:node_memory_utilisation:ratio - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) record: instance:node_vmstat_pgmajfault:rate5m - - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"}[5m]) + - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) record: instance_device:node_disk_io_time_seconds:rate5m - - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"}[5m]) + - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) record: instance_device:node_disk_io_time_weighted_seconds:rate5m - expr: |- sum without (device) ( diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml index 2fa7e28d3cb1..a3bdb226a2f2 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml @@ -35,11 +35,11 @@ spec: summary: Filesystem is predicted to run out of space within the next 24 hours. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15 + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -59,11 +59,11 @@ spec: summary: Filesystem is predicted to run out of space within the next 4 hours. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 10 + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -83,9 +83,9 @@ spec: summary: Filesystem has less than 5% space left. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5 + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 30m labels: @@ -105,9 +105,9 @@ spec: summary: Filesystem has less than 3% space left. expr: |- ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3 + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 30m labels: @@ -127,11 +127,11 @@ spec: summary: Filesystem is predicted to run out of inodes within the next 24 hours. expr: |- ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40 + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -151,11 +151,11 @@ spec: summary: Filesystem is predicted to run out of inodes within the next 4 hours. expr: |- ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20 + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -175,9 +175,9 @@ spec: summary: Filesystem has less than 5% inodes left. expr: |- ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5 + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -197,9 +197,9 @@ spec: summary: Filesystem has less than 3% inodes left. expr: |- ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3 + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 ) for: 1h labels: @@ -331,7 +331,7 @@ spec: description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddegraded summary: RAID Array is degraded - expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"}) > 0 + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 for: 15m labels: severity: critical @@ -348,7 +348,7 @@ spec: description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap. runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddiskfailure summary: Failed device in RAID array - expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} > 0 + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 labels: severity: warning {{- if .Values.defaultRules.additionalRuleLabels }} diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml index 4f8da294fa6b..ee1a66f92127 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml @@ -31,11 +31,11 @@ spec: )) record: 'node_namespace_pod:kube_pod_info:' - expr: |- - count by (cluster, node) (sum by (node, cpu) ( - node_cpu_seconds_total{job="node-exporter"} - * on (namespace, pod) group_left(node) + count by (cluster, node) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:) - )) + ) record: node:node_num_cpu:sum - expr: |- sum( @@ -49,7 +49,15 @@ spec: ) by (cluster) record: :node_memory_MemAvailable_bytes:sum - expr: |- - sum(rate(node_cpu_seconds_total{job="node-exporter",mode!="idle",mode!="iowait",mode!="steal"}[5m])) / - count(sum(node_cpu_seconds_total{job="node-exporter"}) by (cluster, instance, cpu)) + avg by (cluster, node) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m + - expr: |- + avg by (cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) record: cluster:node_cpu:ratio_rate5m {{- end }} \ No newline at end of file diff --git a/charts/kube-prometheus-stack/values.yaml b/charts/kube-prometheus-stack/values.yaml index e5d805cd19a5..8da2879c1320 100644 --- a/charts/kube-prometheus-stack/values.yaml +++ b/charts/kube-prometheus-stack/values.yaml @@ -1908,7 +1908,7 @@ prometheusOperator: image: registry: quay.io repository: prometheus-operator/prometheus-operator - tag: v0.60.1 + tag: v0.61.1 sha: "" pullPolicy: IfNotPresent @@ -1934,7 +1934,7 @@ prometheusOperator: image: registry: quay.io repository: prometheus-operator/prometheus-config-reloader - tag: v0.60.1 + tag: v0.61.1 sha: "" # resource config for prometheusConfigReloader @@ -1951,7 +1951,7 @@ prometheusOperator: thanosImage: registry: quay.io repository: thanos/thanos - tag: v0.28.1 + tag: v0.29.0 sha: "" ## Set a Field Selector to filter watched secrets @@ -2389,7 +2389,7 @@ prometheus: image: registry: quay.io repository: prometheus/prometheus - tag: v2.39.1 + tag: v2.40.5 sha: "" ## Tolerations for use with node taints @@ -3231,7 +3231,7 @@ thanosRuler: image: registry: quay.io repository: thanos/thanos - tag: v0.28.1 + tag: v0.29.0 sha: "" ## Namespaces to be selected for PrometheusRules discovery.