Skip to content

Commit

Permalink
Remove deprecated fluentd/fluentbit alerts (#134)
Browse files Browse the repository at this point in the history
  • Loading branch information
hectorhuertas authored Nov 11, 2024
1 parent 95e82e3 commit 95aec9e
Showing 1 changed file with 0 additions and 49 deletions.
49 changes: 0 additions & 49 deletions common/logging.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,55 +12,6 @@ groups:
annotations:
summary: "{{ $labels.instance }} log forwarder is not exposing metrics for 30m"
action: "ssh into {{ $labels.instance }} and make sure `promtail.service` is running"
- alert: LogForwarderFailingToInput(kube)
expr: rate(fluentd_input_status_num_records_total{job="kubernetes-pods",kubernetes_pod_name=~"forwarder-.*"}[5m]) == 0
for: 2h
labels:
team: infra
annotations:
summary: "{{ $labels.kubernetes_pod_name }} can't ingest logs from {{ $labels.input }} for 2h"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/bk2muXYMz/log-forwarder?var-forwarder_pod={{ $labels.kubernetes_pod_name }}|link>
- alert: LogForwarderFailingToOutput(kube)
expr: rate(fluentd_output_status_retry_count{job="kubernetes-pods",kubernetes_pod_name=~"forwarder-.*"}[5m]) > 0
for: 15m
labels:
team: infra
annotations:
summary: "{{ $labels.kubernetes_pod_name }} can't forward logs for 15m"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/bk2muXYMz/log-forwarder?var-forwarder_pod={{ $labels.kubernetes_pod_name }}|link>
- alert: LogForwarderBufferFillingUp(kube)
expr: fluentd_output_status_buffer_available_space_ratio{job="kubernetes-pods",kubernetes_pod_name=~"forwarder-.*"} < 95
for: 15m
labels:
team: infra
annotations:
summary: "Forwarder buffer is over 5%"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/bk2muXYMz/log-forwarder?var-forwarder_pod={{ $labels.kubernetes_pod_name }}|link>
- alert: LogForwarderDroppingSystemLogs
expr: rate(log_forwarder_messages_total{log_kube_namespace=~"kube-system|sys-*", log_kube_app!="apiserver", log_kube_app!="kube-controller"}[5m]) > 10
for: 10m
labels:
team: infra
annotations:
summary: "{{ $labels.log_kube_namespace }}/{{ $labels.log_kube_app }} is being noisy and dropping logs"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/bk2muXYMz/log-forwarder?var-forwarder_pod={{ $labels.kubernetes_pod_name }}|link>
# Logs drop at 4000logs/60s at each aggregator, but since our scrape_interval is 1m, alerting on a more reliable 5m range
- alert: LogAggregatorDroppingSystemLogs
expr: sum(rate(log_aggregator_messages_total{log_kube_namespace=~"kube-system|sys-*"}[5m])) by (log_kube_namespace, kubernetes_pod_name) > 60
for: 10m
labels:
team: infra
annotations:
summary: "{{ $labels.log_kube_namespace }} is being noisy and dropping logs on aggregator {{ $labels.kubernetes_pod_name }}"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/vcsXDH2mz/fluentd-aggregators?var-kube_namespace_name={{ $labels.log_kube_namespace }}&&viewPanel=22|link>
- alert: LogAggregatorBufferFillingUp(kube)
expr: fluentd_output_status_buffer_available_space_ratio{job="kubernetes-pods",kubernetes_pod_name=~"fluentd-.*"} < 50
for: 15m
labels:
team: infra
annotations:
summary: "Log aggregator buffer is over 50%"
dashboard: <https://grafana.$ENVIRONMENT.$PROVIDER.uw.systems/d/vcsXDH2mz/fluentd-aggregators?orgId=1&refresh=5m|link>
- alert: PromtailThrottling
expr: label_replace(rate(logentry_dropped_lines_by_label_total{label_name="limit_key", label_value=~"kube-system.*|sys-.*"}[5m]) > 10, "label_namespace", "$1", "label_value", `(.*)\/(.*)`)
for: 10m
Expand Down

0 comments on commit 95aec9e

Please sign in to comment.