From 6e930f90fc0dbf83187fb56735745890464e0257 Mon Sep 17 00:00:00 2001 From: Sergey Kozlov Date: Tue, 24 Sep 2019 17:33:03 +0700 Subject: [PATCH] Added option to store datanode data on persistent volumes --- charts/README.md | 33 ++++++++++- .../templates/datanode-daemonset.yaml | 59 +++++++++++++++++++ charts/hdfs-k8s/values.yaml | 19 ++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/charts/README.md b/charts/README.md index 15ee886..7da2ae4 100644 --- a/charts/README.md +++ b/charts/README.md @@ -26,7 +26,8 @@ HDFS on K8s supports the following features: file data. File data should also survive datanode crash or restart. HDFS on K8s stores the file data on the local disks of the K8s cluster nodes using K8s HostPath volumes. (We plan to switch to a better mechanism, K8s - persistent local volumes) + persistent local volumes). + HDFS on K8s supports storing file data on persistent volumes as well. - Kerberos: Vanilla HDFS is not secure. Intruders can easily write custom client code, put a fake user name in requests and steal data. Production HDFS often secure itself using Kerberos. HDFS on K8s supports Kerberos. @@ -368,6 +369,36 @@ node when the pod restarts. --set hdfs-simple-namenode-k8s.nodeSelector.hdfs-namenode-selector=hdfs-namenode-0 ``` +### Using persistent volumes for datanodes + +You can store file data on persistent volumes instead of hostPath volumes. +In this case, datanode pods are managed by StatefulSet instead of DaemonSet. + +To install the chart in this mode, run + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set hdfs-datanode-k8s.persistence.enabled=true +``` + +By default, 2 datanodes are created with 100Gi volume each. + +You can customize datanode number. For example, to create 3 datanodes, run + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set hdfs-datanode-k8s.persistence.enabled=true \ + --set hdfs-datanode-k8s.persistence.replicas=3 +``` + +You can also customize other persistence properties by analogy with namenodes, for example + +``` + $ helm install -n my-hdfs charts/hdfs-k8s \ + --set hdfs-datanode-k8s.persistence.enabled=true \ + --set hdfs-datanode-k8s.persistence.size=200Gi +``` + # Security ## K8s secret containing Kerberos keytab files diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml index 09445ed..c821add 100644 --- a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -32,10 +32,36 @@ data: done echo $_CLUSTER_ID | grep -q -v null --- +{{- if .Values.persistence.enabled }} +# Required to generate StatefulSet pod names. +apiVersion: v1 +kind: Service +metadata: + name: {{ template "hdfs-k8s.datanode.fullname" . }} + labels: + app: {{ template "hdfs-k8s.datanode.name" . }} + chart: {{ template "hdfs-k8s.subchart" . }} + release: {{ .Release.Name }} + annotations: + # TODO: Deprecated. Replace tolerate-unready-endpoints with + # v1.Service.PublishNotReadyAddresses. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + clusterIP: None + selector: + app: {{ template "hdfs-k8s.datanode.name" . }} + release: {{ .Release.Name }} +{{- end }} +--- +{{- if .Values.persistence.enabled }} +apiVersion: apps/v1beta1 +kind: StatefulSet +{{- else }} # Deleting a daemonset may need some trick. See # https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 apiVersion: extensions/v1beta1 kind: DaemonSet +{{- end }} metadata: name: {{ template "hdfs-k8s.datanode.fullname" . }} labels: @@ -43,6 +69,10 @@ metadata: chart: {{ template "hdfs-k8s.subchart" . }} release: {{ .Release.Name }} spec: + {{- if .Values.persistence.enabled }} + serviceName: {{ template "hdfs-k8s.datanode.fullname" . }} + replicas: {{ .Values.persistence.replicas }} + {{- end }} template: metadata: labels: @@ -115,10 +145,15 @@ spec: - name: hdfs-config mountPath: /etc/hadoop-custom-conf readOnly: true + {{- if .Values.persistence.enabled }} + - name: hdfs-data-0 + mountPath: /hadoop/dfs/data/0 + {{- else }} {{- range $index, $path := .Values.global.dataNodeHostPath }} - name: hdfs-data-{{ $index }} mountPath: /hadoop/dfs/data/{{ $index }} {{- end }} + {{- end }} {{- if .Values.global.kerberosEnabled }} - name: kerberos-config mountPath: /etc/krb5.conf @@ -167,11 +202,13 @@ spec: configMap: name: {{ template "hdfs-k8s.datanode.fullname" . }}-scripts defaultMode: 0744 + {{- if not .Values.persistence.enabled }} {{- range $index, $path := .Values.global.dataNodeHostPath }} - name: hdfs-data-{{ $index }} hostPath: path: {{ $path }} {{- end }} + {{- end }} - name: hdfs-config configMap: name: {{ template "hdfs-k8s.config.fullname" . }} @@ -189,3 +226,25 @@ spec: emptyDir: {} {{- end }} {{- end }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: hdfs-data-0 + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} + {{- if .Values.persistence.selector }} + selector: +{{ toYaml .Values.persistence.selector | indent 10 }} + {{- end }} + {{- end }} diff --git a/charts/hdfs-k8s/values.yaml b/charts/hdfs-k8s/values.yaml index 77ca3fe..89acf9e 100644 --- a/charts/hdfs-k8s/values.yaml +++ b/charts/hdfs-k8s/values.yaml @@ -123,6 +123,25 @@ hdfs-datanode-k8s: nodeSelector: {} tolerations: [] affinity: {} + persistence: + enabled: false + replicas: 2 + accessMode: ReadWriteOnce + size: 100Gi + ## Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## To choose a suitable persistent volume from available static volumes, selectors + ## are used. + # selector: + # matchLabels: + # volume-type: hdfs-ssd ## ------------------------------------------------------------------------------ ## hdfs-krb5-k8s: