-
Notifications
You must be signed in to change notification settings - Fork 27
/
values.yaml
232 lines (207 loc) · 6.66 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# Access Control List for JupyterHub, will be written to a file mounted in
# /etc/jupyterhub/acl/acl.yaml and parsed by custom authenticator logic.
acl.yaml: {}
nfs:
enabled: false
serverIP: ""
serverName: ""
gitRepoSync:
enabled: false
nodeCacher:
enabled: false
tags:
# Controls whether Prometheus and Grafana should be be installed as part of
# this meta Helm chart. The chart is configured to use them if they are
# available. Its also worth noting that access to Grafana relies on
# JupyterHub's configurable proxy in this setup, so JupyterHub needs to run to
# be able to access Grafana.
metrics: true
jupyterhub:
hub:
# NOTE: readinessProbe is disabled because of bad default values make a hub
# under load become unavailable.
#
# ref: https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/1732
readinessProbe:
enabled: false
# ref: https://jupyterhub.readthedocs.io/en/stable/reference/services.html#properties-of-a-service
services:
grafana:
# This will make the CHP proxy let /services/grafana route to the
# grafana service in the k8s namespace, which lets us make use of
# JupyterHub's HTTPS setup without needing something like nginx-ingress
# + cert-manager and additional ingress k8s resources.
url: http://grafana
db:
pvc:
storageClassName: ssd
extraConfig:
00-acl-parsing: |
import os
import yaml
def read_acl(path="/etc/jupyterhub/acl/acl.yaml"):
"""
Load a mounted Access Control List (ACL) on startup from disk and
return a dictionary with keys representing group names and with sets
of usernames as values.
"""
acl = {}
if os.path.exists(path):
print(f"Loading Access Control List (ACL) from {path}")
with open(path) as f:
acl = yaml.safe_load(f)
for group, usernames in acl.items():
acl[group] = {str(username).lower() for username in usernames}
else:
print(f"No Access Control List (ACL) at {path}")
return acl
acl = read_acl()
c.Authenticator.admin_users = acl["admins"]
c.Authenticator.whitelist = set.union(
acl["admins"],
acl["instructors"],
acl["participants"],
)
resources:
requests:
cpu: 50m
memory: 1Gi
limits:
cpu: 1000m
memory: 1Gi
proxy:
chp:
resources:
requests:
memory: 320Mi
cpu: 50m
limits:
memory: 320Mi
cpu: 500m
traefik:
resources:
requests:
memory: 512Mi
cpu: 50m
limits:
memory: 512Mi
cpu: 1000m
# Reference on the configuration options:
# https://github.com/helm/charts/blob/master/stable/grafana/values.yaml
grafana:
fullnameOverride: grafana
# NOTE: It can be useful to be able to render an image of a chart, but that
# requires a workaround configuration of the Grafana Helm chart at the
# moment.
#
# workaround: https://github.com/helm/charts/issues/21959#issuecomment-640653320
# issue with workaround: https://github.com/grafana/grafana/issues/25716
extraContainers: |
- name: renderer
image: grafana/grafana-image-renderer:latest
resources:
requests:
memory: 128Mi
cpu: 10m
limits:
memory: 512Mi
cpu: 250m
env:
GF_RENDERING_SERVER_URL: http://localhost:8081/render
GF_RENDERING_CALLBACK_URL: http://localhost:3000/services/grafana
# NOTE: We need Recreate when using a persistence PVC. If we use an external
# database, we can do a RollingUpdate instead.
deploymentStrategy:
type: Recreate
persistence:
type: pvc
enabled: true
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/services/grafana/metrics"
resources:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 50m
memory: 100Mi
initChownData:
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 25m
memory: 64Mi
# Reference on the configuration options:
# https://github.com/helm/charts/blob/master/stable/prometheus/values.yaml
prometheus:
fullnameOverride: prometheus
# the actual prometheus server that polls various sources for metrics etc.
server:
fullnameOverride: prometheus-server
enabled: true
# data retention period
retention: 3y
# NOTE: We prefer StatefulSet to be used when using a persistence PVC. If we
# use an external database, we can use a Deployment with rolling
# updates instead. Until then, we should shut down one pod and then
# start up another, which a StatefulSet will do by default and a
# Deployment will Recreate as an upgradeStrategy will also do.
statefulSet:
enabled: true
persistentVolume:
enabled: true
size: 200Gi
storageClass: ssd
resources:
limits:
cpu: 2
memory: 12Gi
requests:
cpu: 50m
# IMPORTANT: This value was lowered to 100Mi from 12Gi after the course
# ended to allow prometheus to run in a cheaper node.
memory: 100Mi
# alertmanager is meant to be able to alert using email etc. Grafana can also
# do this by itself to some degree at least as I understand it.
alertmanager:
fullnameOverride: prometheus-alertmanager
enabled: false
# kube-state-metrics exports information coming from the kubernetes api-server
# about the state of kubernetes resources. It can list the state of pods etc.
#
# ref: https://github.com/helm/charts/blob/master/stable/prometheus/requirements.yaml
# ref: https://github.com/helm/charts/tree/master/stable/kube-state-metrics
kube-state-metrics:
fullnameOverride: prometheus-kube-state-metrics
resources:
limits:
cpu: 100m
memory: 64Mi
requests:
cpu: 10m
memory: 32Mi
kubeStateMetrics:
enabled: true
nodeExporter:
fullnameOverride: prometheus-node-exporter
enabled: true
# NOTE: We want to be able to scrape metrics on all nodes, even GPU nodes
# etc.
tolerations:
- operator: "Exists"
resources:
limits:
cpu: 200m
memory: 50Mi
requests:
cpu: 50m
memory: 30Mi
# pushgateway is meant to buffer metrics pushed to it from short lived sources
# and expose them later for prometheus in their place.
pushgateway:
fullnameOverride: prometheus-pushgateway
enabled: false