-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathvalues.yaml
130 lines (127 loc) · 4.95 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Default values for vault-monitoring.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# -- specifies the name override to be used for helm
nameOverride: ""
# -- specifies the full name override to be used for helm
fullnameOverride: ""
vault:
# -- the vault servername
serverName: vault.example.com
# -- the vault port to connect to
port: 443
# -- the vault portName to use in the services
portName: https
# -- the CA path to include in the configuration
ca_path: /etc/vault/ssl/ca.crt
# -- the vault ip to connect to
ip: 10.1.2.3
# -- the scheme to use for connection
scheme: https
# -- if set a secret is created that must be mounted in Prometheus for the ServiceMonitoring to work
ca: ""
auth:
# -- where the kubernetes auth is mounted on vault
mount_path: auth/kubernetes
# -- the vault role to use for connection
role: metrics
service:
# -- which type the vault service has. For connecting to an external vault server, choose ExternalName
type: ExternalName
# -- definition of the vault service selector for endpoint selection. Keep empty for using ExternalName
selector: {}
serviceMonitor:
# -- wheter or not the serviceMonitor should be created
create: true
# -- labels to set on the vault serviceMonitor
labels: {}
# -- scrapeTimeout of the serviceMonitor
scrapeTimeout: 30s
# -- Specify to enable authentication
authentication: true
# -- Specify a bearerTokenFile for authentication. Keep empty for Unauthenticated access
bearerTokenFile: '/etc/prometheus/config_out/.vault-token'
prometheusRules:
# -- wether or not the prometheus alerts are enabled
enabled: false
# -- set namespace where the prometheusRules will be created
namespace: ""
# -- labels to set in the prometheus alerts
labels: {}
# -- set of prometheus alerts to define
# @default -- list of predefined alerts
rules:
- alert: VaultAutopilotNodeHealthy
# Set to 1 if Autopilot considers all nodes healthy
# https://www.vaultproject.io/docs/internals/telemetry#integrated-storage-raft-autopilot
expr: vault_autopilot_healthy < 1
for: 1m
labels:
severity: critical
annotations:
summary: Autopilot Vault Raft node unhealthy
description: At least one of the Autopilot Vault Raft nodes is unhealthy
- alert: VaultLeadershipLoss
expr: sum(increase(vault_core_leadership_lost_count{job="vault-monitoring"}[1h])) > 5
for: 1m
labels:
severity: critical
annotations:
summary: High frequency of Vault leadership losses
description: There have been more than 5 Vault leadership losses in the past 1h
- alert: VaultLeadershipStepDowns
expr: sum(increase(vault_core_step_down_count{job="vault-monitoring"}[1h])) > 5
for: 1m
labels:
severity: critical
annotations:
summary: High frequency of Vault leadership step downs
description: There have been more than 5 Vault leadership step downs in the past 1h
- alert: VaultLeadershipSetupFailures
expr: sum(increase(vault_core_leadership_setup_failed{job="vault-monitoring"}[1h])) > 5
for: 1m
labels:
severity: critical
annotations:
summary: High frequency of Vault leadership setup failures
description: There have been more than 5 Vault leadership setup failures in the past 1h
- alert: VaultRequestFailures
expr: increase(vault_audit_log_request_failure[5m]) > 0
for: 15m
labels:
severity: critical
annotations:
summary: High frequency of failed Vault requests
description: There has been an increased number of failed Vault requests in the last 15 minutes
- alert: VaultResponseFailures
expr: increase(vault_audit_log_response_failure[5m]) > 0
for: 15m
labels:
severity: critical
annotations:
summary: High frequency of failed Vault responses
description: There has been an increased number of failed Vault responses in the last 15 minutes
- alert: VaultTokenCreate
expr: increase(vault_token_create_count[5m]) > 100
for: 15m
labels:
severity: critical
annotations:
summary: High frequency of created Vault token
description: There has been an increased number of Vault token creation in the last 15 minutes
- alert: VaultTokenStore
expr: increase(vault_token_store_count[5m]) > 100
for: 15m
labels:
severity: critical
annotations:
summary: High frequency of stored Vault token
description: There has been an increased number of Vault token storing in the last 15 minutes
- alert: VaultLowFailureTolerance
expr: vault_autopilot_failure_tolerance == 0
for: 5m
labels:
severity: critical
annotations:
summary: Vault Raft Cluster Failure Tolerance Critical
description: Vault Raft cluster has a failure tolerance of 0