This is an automated email from the ASF dual-hosted git repository.
tejaskriya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ozone-helm-charts.git
The following commit(s) were added to refs/heads/main by this push:
new bd435a3 HDDS-11618. Enable HA mode for OM and SCM (#20)
bd435a3 is described below
commit bd435a39d74233e27c7ded0ff86d082475946c80
Author: Tejaskriya <[email protected]>
AuthorDate: Wed Jan 21 12:21:09 2026 +0530
HDDS-11618. Enable HA mode for OM and SCM (#20)
Co-authored-by: TobiasPyttel <[email protected]>
---
charts/ozone/templates/_helpers.tpl | 160 +++++++++++++++++++--
.../datanode/datanode-service-headless.yaml | 4 +
.../templates/datanode/datanode-statefulset.yaml | 8 +-
.../ozone/templates/helm/om-decommission-job.yaml | 105 ++++++++++++++
.../templates/helm/om-decommission-service.yaml | 30 ++++
.../templates/helm/om-leader-transfer-job.yaml | 84 +++++++++++
.../ozone/templates/om/om-bootstrap-configmap.yaml | 98 +++++++++++++
charts/ozone/templates/om/om-service-headless.yaml | 6 +
charts/ozone/templates/om/om-statefulset.yaml | 45 ++++--
charts/ozone/templates/ozone-configmap.yaml | 4 +
charts/ozone/templates/s3g/s3g-statefulset.yaml | 4 +-
.../ozone/templates/scm/scm-service-headless.yaml | 12 ++
charts/ozone/templates/scm/scm-statefulset.yaml | 35 ++++-
charts/ozone/values.yaml | 116 ++++++++++++---
14 files changed, 662 insertions(+), 49 deletions(-)
diff --git a/charts/ozone/templates/_helpers.tpl
b/charts/ozone/templates/_helpers.tpl
index 71ea063..59794b4 100644
--- a/charts/ozone/templates/_helpers.tpl
+++ b/charts/ozone/templates/_helpers.tpl
@@ -51,22 +51,116 @@ app.kubernetes.io/instance: {{ .Release.Name }}
{{- $pods | join "," }}
{{- end }}
-{{/* Common configuration environment variables */}}
-{{- define "ozone.configuration.env" -}}
+{{/* List of comma separated om ids */}}
+{{- define "ozone.om.cluster.ids" -}}
+ {{- $pods := list }}
+ {{- $replicas := .Values.om.replicas | int }}
+ {{- range $i := until $replicas }}
+ {{- $pods = append $pods (printf "%s-om-%d" $.Release.Name $i) }}
+ {{- end }}
+ {{- $pods | join "," }}
+{{- end }}
+
+{{/* List of comma separated scm ids */}}
+{{- define "ozone.scm.cluster.ids" -}}
+ {{- $pods := list }}
+ {{- $replicas := .Values.scm.replicas | int }}
+ {{- range $i := until $replicas }}
+ {{- $pods = append $pods (printf "%s-scm-%d" $.Release.Name $i) }}
+ {{- end }}
+ {{- $pods | join "," }}
+{{- end }}
+
+{{/* List of decommission om nodes */}}
+{{- define "ozone.om.decommissioned.nodes" -}}
+ {{- $nodes := list }}
+ {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace
(printf "%s-om" $.Release.Name) -}}
+ {{- if $statefulset }}
+ {{- $oldCount := $statefulset.spec.replicas | int -}}
+ {{- $newCount := .Values.om.replicas | int }}
+ {{- range $i := until $oldCount }}
+ {{- $minCount := max $newCount 1 -}}
+ {{- if ge $i $minCount }}
+ {{- $nodes = append $nodes (printf "%s-om-%d" $.Release.Name $i) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ {{- $nodes | join "," }}
+{{- end }}
+
+{{/* List of bootstrap om nodes */}}
+{{- define "ozone.om.bootstrap.nodes" -}}
+ {{- $nodes := list }}
+ {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace
(printf "%s-om" $.Release.Name) -}}
+ {{- if $statefulset }}
+ {{- $oldCount := $statefulset.spec.replicas | int -}}
+ {{- $newCount := .Values.om.replicas | int }}
+ {{- range $i := until $newCount }}
+ {{- if ge $i $oldCount }}
+ {{- $nodes = append $nodes (printf "%s-om-%d" $.Release.Name $i) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ {{- $nodes | join ","}}
+{{- end }}
+
+{{/* List of decommission scm nodes */}}
+{{- define "ozone.scm.decommissioned.nodes" -}}
+ {{- $nodes := list }}
+ {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace
(printf "%s-scm" $.Release.Name) -}}
+ {{- if $statefulset }}
+ {{- $oldCount := $statefulset.spec.replicas | int -}}
+ {{- $newCount := .Values.scm.replicas | int }}
+ {{- range $i := until $oldCount }}
+ {{- if ge $i $newCount }}
+ {{- $nodes = append $nodes (printf "%s-scm-%d" $.Release.Name $i)
}}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ {{- $nodes | join "," -}}
+{{- end }}
+
+{{/* List of decommission data nodes */}}
+{{- define "ozone.data.decommissioned.hosts" -}}
+ {{- $hosts := list }}
+ {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace
(printf "%s-datanode" $.Release.Name) -}}
+ {{- if $statefulset }}
+ {{- $oldCount := $statefulset.spec.replicas | int -}}
+ {{- $newCount := .Values.datanode.replicas | int }}
+ {{- range $i := until $oldCount }}
+ {{- if ge $i $newCount }}
+ {{- $hosts = append $hosts (printf
"%s-datanode-%d.%s-datanode-headless.%s.svc.cluster.local" $.Release.Name $i
$.Release.Name $.Release.Namespace) }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
+ {{- $hosts | join "," -}}
+{{- end }}
+
+{{- define "ozone.configuration.env.common" -}}
- name: OZONE-SITE.XML_hdds.datanode.dir
value: /data/storage
- name: OZONE-SITE.XML_ozone.scm.datanode.id.dir
value: /data/metadata
- name: OZONE-SITE.XML_ozone.metadata.dirs
value: /data/metadata
-- name: OZONE-SITE.XML_ozone.scm.block.client.address
- value: {{ include "ozone.scm.pods" . }}
-- name: OZONE-SITE.XML_ozone.scm.client.address
- value: {{ include "ozone.scm.pods" . }}
-- name: OZONE-SITE.XML_ozone.scm.names
- value: {{ include "ozone.scm.pods" . }}
-- name: OZONE-SITE.XML_ozone.om.address
- value: {{ include "ozone.om.pods" . }}
+- name: OZONE-SITE.XML_ozone.scm.ratis.enable
+ value: "true"
+- name: OZONE-SITE.XML_ozone.scm.service.ids
+ value: {{ .Values.clusterId }}
+- name: OZONE-SITE.XML_ozone.scm.nodes.{{ .Values.clusterId }}
+ value: {{ include "ozone.scm.cluster.ids" . }}
+ {{/*- name: OZONE-SITE.XML_ozone.scm.skip.bootstrap.validation*/}}
+ {{/* value: {{ quote .Values.scm.skipBootstrapValidation }}*/}}
+{{- range $i, $val := until ( .Values.scm.replicas | int ) }}
+- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.%s.%s-scm-%d"
$.Values.clusterId $.Release.Name $i }}
+ value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local"
$.Release.Name $i $.Release.Name $.Release.Namespace }}
+{{- end }}
+- name: OZONE-SITE.XML_ozone.scm.primordial.node.id
+ value: {{ printf "%s-scm-0" $.Release.Name }}
+- name: OZONE-SITE.XML_ozone.om.ratis.enable
+ value: "true"
+- name: OZONE-SITE.XML_ozone.om.service.ids
+ value: {{ .Values.clusterId }}
- name: OZONE-SITE.XML_hdds.scm.safemode.min.datanode
value: "3"
- name: OZONE-SITE.XML_ozone.datanode.pipeline.limit
@@ -78,3 +172,49 @@ app.kubernetes.io/instance: {{ .Release.Name }}
value: "{{- printf "%s-recon.%s.svc.cluster.local" $.Release.Name
$.Release.Namespace }}:9891"
{{- end }}
{{- end }}
+
+{{/* Common configuration environment variables */}}
+{{- define "ozone.configuration.env" -}}
+{{- $bOmNodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes"
.)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }}
+{{- $dOmNodes := ternary (splitList "," (include
"ozone.om.decommissioned.nodes" .)) (list) (ne "" (include
"ozone.om.decommissioned.nodes" .)) }}
+{{- $activeOmNodes := ternary (splitList "," (include "ozone.om.cluster.ids"
.)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }}
+{{ include "ozone.configuration.env.common" . }}
+{{- if gt (len $dOmNodes) 0 }}
+{{- $decomIds := $dOmNodes | join "," }}
+- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }}
+ value: {{ $decomIds }}
+{{- else}}
+- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }}
+ value: ""
+{{- end }}
+- name: OZONE-SITE.XML_ozone.om.nodes.{{ .Values.clusterId }}
+ value: {{ $activeOmNodes | join "," }}
+{{- range $tempId := $activeOmNodes }}
+- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId
$tempId }}
+ value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId
$.Release.Name $.Release.Namespace }}
+{{- end }}
+{{- range $tempId := $dOmNodes }}
+- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId
$tempId }}
+ value: {{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local"
$.Release.Name $tempId $.Release.Namespace }}
+{{- end }}
+{{- end }}
+
+{{/* Common configuration environment variables for pre hook */}}
+{{- define "ozone.configuration.env.prehook" -}}
+{{- $bOmNodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes"
.)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }}
+{{- $dOmNodes := ternary (splitList "," (include
"ozone.om.decommissioned.nodes" .)) (list) (ne "" (include
"ozone.om.decommissioned.nodes" .)) }}
+{{- $activeOmNodes := ternary (splitList "," (include "ozone.om.cluster.ids"
.)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }}
+{{- $allOmNodes := concat $activeOmNodes $dOmNodes }}
+{{ include "ozone.configuration.env.common" . }}
+- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }}
+ value: ""
+{{- range $tempId := $allOmNodes }}
+- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId
$tempId }}
+ value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId
$.Release.Name $.Release.Namespace }}
+{{- end }}
+{{ $allOmNodes = append $allOmNodes "om-leader-transfer"}}
+- name: OZONE-SITE.XML_ozone.om.nodes.{{ .Values.clusterId }}
+ value: {{ $allOmNodes | join "," }}
+- name: "OZONE-SITE.XML_ozone.om.address.{{ .Values.clusterId
}}.om-leader-transfer"
+ value: localhost
+{{- end }}
\ No newline at end of file
diff --git a/charts/ozone/templates/datanode/datanode-service-headless.yaml
b/charts/ozone/templates/datanode/datanode-service-headless.yaml
index 375abb1..6c62959 100644
--- a/charts/ozone/templates/datanode/datanode-service-headless.yaml
+++ b/charts/ozone/templates/datanode/datanode-service-headless.yaml
@@ -28,6 +28,10 @@ spec:
ports:
- name: ui
port: {{ .Values.datanode.service.port }}
+ - name: ratis-ipc
+ port: {{ .Values.datanode.service.ratisIpcPort }}
+ - name: ipc
+ port: {{ .Values.datanode.service.ipcPort }}
selector:
{{- include "ozone.selectorLabels" . | nindent 4 }}
app.kubernetes.io/component: datanode
diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml
b/charts/ozone/templates/datanode/datanode-statefulset.yaml
index e47a8d4..5b9dfce 100644
--- a/charts/ozone/templates/datanode/datanode-statefulset.yaml
+++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml
@@ -18,7 +18,7 @@
{{- $env := concat .Values.env .Values.datanode.env }}
{{- $envFrom := concat .Values.envFrom .Values.datanode.envFrom }}
-{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations)
.Values.datanode.podAnnotations }}
+{{- $podAnnotations := mergeOverwrite (deepCopy (default dict
.Values.podAnnotations)) (default dict .Values.datanode.podAnnotations) }}
{{- $nodeSelector := or .Values.datanode.nodeSelector .Values.nodeSelector }}
{{- $affinity := or .Values.datanode.affinity .Values.affinity }}
{{- $tolerations := or .Values.datanode.tolerations .Values.tolerations }}
@@ -40,7 +40,7 @@ spec:
template:
metadata:
annotations:
- checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | sha256sum }}
+ checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) |
sha256sum }}
{{- with $podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
@@ -69,6 +69,10 @@ spec:
ports:
- name: ui
containerPort: {{ .Values.datanode.service.port }}
+ - name: ratis-ipc
+ containerPort: {{ .Values.datanode.service.ratisIpcPort }}
+ - name: ipc
+ containerPort: {{ .Values.datanode.service.ipcPort }}
livenessProbe:
httpGet:
path: /
diff --git a/charts/ozone/templates/helm/om-decommission-job.yaml
b/charts/ozone/templates/helm/om-decommission-job.yaml
new file mode 100644
index 0000000..45b2d98
--- /dev/null
+++ b/charts/ozone/templates/helm/om-decommission-job.yaml
@@ -0,0 +1,105 @@
+{{- if .Values.om.persistence.enabled }}
+{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes"
.)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }}
+{{- $env := concat .Values.env .Values.helm.env }}
+{{- $envFrom := concat .Values.envFrom .Values.helm.envFrom }}
+{{- $nodeSelector := or .Values.helm.nodeSelector .Values.nodeSelector }}
+{{- $affinity := or .Values.helm.affinity .Values.affinity }}
+{{- $tolerations := or .Values.helm.tolerations .Values.tolerations }}
+{{- $securityContext := or .Values.helm.securityContext
.Values.securityContext }}
+{{- if (gt (len $dnodes) 0) }}
+{{- range $dnode := $dnodes }}
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }}
+ labels:
+ {{- include "ozone.labels" $ | nindent 4 }}
+ app.kubernetes.io/component: helm-manager
+ annotations:
+ "helm.sh/hook": post-upgrade
+ "helm.sh/hook-weight": "0"
+ "helm.sh/hook-delete-policy": hook-succeeded, hook-failed
+spec:
+ backoffLimit: {{ $.Values.helm.backoffLimit }}
+ template:
+ metadata:
+ labels:
+ {{- include "ozone.selectorLabels" $ | nindent 8 }}
+ app.kubernetes.io/component: helm-manager
+ spec:
+ containers:
+ - name: om-decommission
+ image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag |
default $.Chart.AppVersion }}"
+ imagePullPolicy: {{ $.Values.image.pullPolicy }}
+ {{- with $.Values.om.command }}
+ command: {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ args:
+ - sh
+ - -c
+ - |
+ set -e
+ decommission_finalizer() {
+ echo "Init decommission finalizer process..."
+ while true; do
+ IFS= read -r line;
+ echo "$line"
+ if echo "$line" | grep -q "Successfully decommissioned OM
{{ $dnode }}"; then
+ echo "{{ $dnode }} was successfully decommissioned!"
+ if [ -d /old{{ $.Values.om.persistence.path }} ]; then
+ echo "Delete old data on pvc to enable rescheduling
without manual PVC deletion!"
+ rm -rf /old{{ $.Values.om.persistence.path }}/*
+ echo "Data deleted!"
+ fi
+ break;
+ fi
+ done
+ echo "Decommission finalizer process finished!"
+ exit 0
+ }
+ exec ozone admin om decommission -id={{ $.Values.clusterId }}
-nodeid={{ $dnode }} -hostname={{ printf
"%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name
$dnode $.Release.Namespace }} | decommission_finalizer
+ env:
+ {{- include "ozone.configuration.env" $ | nindent 12 }}
+ {{- with $env }}
+ {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ {{- with $envFrom }}
+ envFrom: {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ ports:
+ - name: om-rpc
+ containerPort: {{ $.Values.om.service.rpcPort }}
+ - name: om-ratis
+ containerPort: {{ $.Values.om.service.ratisPort }}
+ volumeMounts:
+ - name: config
+ mountPath: {{ $.Values.configuration.dir }}
+ - name: om-data
+ mountPath: {{ $.Values.om.persistence.path }}
+ - name: om-data-old
+ mountPath: /old{{ $.Values.om.persistence.path }}
+ {{- with $nodeSelector }}
+ nodeSelector: {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with $securityContext }}
+ securityContext: {{- toYaml . | nindent 8 }}
+ {{- end }}
+ volumes:
+ - name: om-data-old
+ persistentVolumeClaim:
+ claimName: {{ $.Release.Name}}-om-{{ $dnode }}
+ - name: om-data
+ emptyDir: { }
+ - name: config
+ projected:
+ sources:
+ - configMap:
+ name: {{ $.Release.Name }}-ozone
+ {{- with $.Values.configuration.filesFrom }}
+ {{- tpl (toYaml .) $ | nindent 14 }}
+ {{- end }}
+ restartPolicy: Never
+{{- end }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/charts/ozone/templates/helm/om-decommission-service.yaml
b/charts/ozone/templates/helm/om-decommission-service.yaml
new file mode 100644
index 0000000..466c5c5
--- /dev/null
+++ b/charts/ozone/templates/helm/om-decommission-service.yaml
@@ -0,0 +1,30 @@
+{{- if .Values.om.persistence.enabled }}
+{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes"
.)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }}
+{{- if (gt (len $dnodes) 0) }}
+{{- range $dnode := $dnodes }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ printf "%s-helm-manager-decommission-%s-svc" $.Release.Name $dnode
}}
+ labels:
+ {{- include "ozone.labels" $ | nindent 4 }}
+ app.kubernetes.io/component: helm-manager
+ annotations:
+ "helm.sh/hook": post-upgrade
+ "helm.sh/hook-weight": "-10"
+ "helm.sh/hook-delete-policy": hook-succeeded, hook-failed
+spec:
+ selector:
+ job-name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name
$dnode }}
+ ports:
+ - name: rpc
+ port: {{ $.Values.om.service.rpcPort }}
+ targetPort: {{ $.Values.om.service.rpcPort }}
+ - name: ratis
+ port: {{ $.Values.om.service.ratisPort }}
+ targetPort: {{ $.Values.om.service.ratisPort }}
+ type: ClusterIP
+{{- end }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/charts/ozone/templates/helm/om-leader-transfer-job.yaml
b/charts/ozone/templates/helm/om-leader-transfer-job.yaml
new file mode 100644
index 0000000..673b47a
--- /dev/null
+++ b/charts/ozone/templates/helm/om-leader-transfer-job.yaml
@@ -0,0 +1,84 @@
+{{- if .Values.om.persistence.enabled }}
+{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes"
.)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }}
+{{- $env := concat .Values.env .Values.helm.env }}
+{{- $envFrom := concat .Values.envFrom .Values.helm.envFrom }}
+{{- $nodeSelector := or .Values.helm.nodeSelector .Values.nodeSelector }}
+{{- $affinity := or .Values.helm.affinity .Values.affinity }}
+{{- $tolerations := or .Values.helm.tolerations .Values.tolerations }}
+{{- $securityContext := or .Values.helm.securityContext
.Values.securityContext }}
+{{- if (gt (len $dnodes) 0) }}
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: {{ printf "%s-helm-manager-leader-transfer" $.Release.Name }}
+ labels:
+ {{- include "ozone.labels" $ | nindent 4 }}
+ app.kubernetes.io/component: helm-manager
+ annotations:
+ "helm.sh/hook": pre-upgrade
+ "helm.sh/hook-weight": "0"
+ "helm.sh/hook-delete-policy": hook-succeeded,hook-failed
+spec:
+ backoffLimit: {{ $.Values.helm.backoffLimit }}
+ template:
+ metadata:
+ labels:
+ {{- include "ozone.selectorLabels" $ | nindent 8 }}
+ app.kubernetes.io/component: helm-manager
+ spec:
+ containers:
+ - name: om-leader-transfer
+ image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag |
default $.Chart.AppVersion }}"
+ imagePullPolicy: {{ $.Values.image.pullPolicy }}
+ {{- with $.Values.om.command }}
+ command: {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ args:
+ - sh
+ - -c
+ - |
+ set -e
+ exec ozone admin om transfer -id={{ $.Values.clusterId }} -n={{
$.Release.Name }}-om-0
+ env:
+ {{- include "ozone.configuration.env.prehook" $ | nindent 12 }}
+ {{- with $env }}
+ {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ {{- with $envFrom }}
+ envFrom:
+ {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ ports:
+ - name: om-rpc
+ containerPort: {{ $.Values.om.service.rpcPort }}
+ {{- if gt (int $.Values.om.replicas) 1 }}
+ - name: om-ratis
+ containerPort: {{ $.Values.om.service.ratisPort }}
+ {{- end }}
+ volumeMounts:
+ - name: config
+ mountPath: {{ $.Values.configuration.dir }}
+ - name: om-data
+ mountPath: {{ $.Values.om.persistence.path }}
+ {{- with $nodeSelector }}
+ nodeSelector: {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with $securityContext }}
+ securityContext: {{- toYaml . | nindent 8 }}
+ {{- end }}
+ volumes:
+ - name: om-data
+ emptyDir: { }
+ - name: config
+ projected:
+ sources:
+ - configMap:
+ name: {{ $.Release.Name }}-ozone
+ {{- with $.Values.configuration.filesFrom }}
+ {{- tpl (toYaml .) $ | nindent 14 }}
+ {{- end }}
+ restartPolicy: Never
+
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/charts/ozone/templates/om/om-bootstrap-configmap.yaml
b/charts/ozone/templates/om/om-bootstrap-configmap.yaml
new file mode 100644
index 0000000..141e19e
--- /dev/null
+++ b/charts/ozone/templates/om/om-bootstrap-configmap.yaml
@@ -0,0 +1,98 @@
+{{- if and .Values.om.persistence.enabled (gt (len (ternary (splitList ","
(include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include
"ozone.om.bootstrap.nodes" .)))) 0) }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ .Release.Name }}-om-bootstrap-script
+ labels:
+ {{- include "ozone.labels" . | nindent 4 }}
+ app.kubernetes.io/component: om
+data:
+ om-bootstrap.sh: |-
+ #!/bin/sh
+ set -e
+
+ HELM_MANAGER_PATH="{{ .Values.om.persistence.path }}{{
.Values.helm.persistence.path }}"
+ HELM_MANAGER_BOOTSTRAPPED_FILE="$HELM_MANAGER_PATH/bootstrapped"
+
+ # These are templated from Helm
+ OZONE_OM_ARGS_LIST="{{- range .Values.om.args }} {{ . }} {{- end }}"
+ OZONE_OM_BOOTSTRAP_NODES="{{ include "ozone.om.bootstrap.nodes" . }}"
+ OZONE_OM_CLUSTER_IDS="{{ include "ozone.om.cluster.ids" . }}"
+ OZONE_CLUSTER_ID="{{ .Values.clusterId }}"
+
+ if [ -z "$OZONE_OM_BOOTSTRAP_NODES" ]; then
+ echo "No bootstrap handling needed!"
+ exit 0
+ fi
+
+ joinArr() {
+ local IFS=","
+ echo "$*"
+ }
+
+ run_bootstrap() {
+ local overwriteCmd="$1"
+ local max_attempts=3
+ local attempt=1
+ local base_delay=5
+ local exit_code=0
+
+ echo "Bootstrapping node config for this node: $overwriteCmd"
+
+ while [ $attempt -le $max_attempts ]; do
+ echo "Bootstrap attempt $attempt of $max_attempts"
+
+ if ozone admin om --set
"ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap; then
+ echo "$HOSTNAME was successfully bootstrapped!"
+ mkdir -p "$HELM_MANAGER_PATH"
+ touch "$HELM_MANAGER_BOOTSTRAPPED_FILE"
+ exit 0
+ else
+ exit_code=$?
+ echo "Bootstrap failed with exit code $exit_code, attempt $attempt
of $max_attempts"
+
+ if [ $attempt -lt $max_attempts ]; then
+ local delay=$((base_delay * (1 << (attempt - 1))))
+ echo "Retrying in $delay seconds..."
+ sleep $delay
+ fi
+
+ attempt=$((attempt + 1))
+ fi
+ done
+
+ echo "Bootstrap failed after $max_attempts attempts with exit code
$exit_code"
+ exit 1
+ }
+
+ bootstrapHosts="$OZONE_OM_BOOTSTRAP_NODES"
+ echo "Need to handle bootstrap for nodes $bootstrapHosts"
+
+ IFS=',' read -r -a hostArray <<< "$bootstrapHosts"
+ doBootstrap=false
+ nodesConfigOverwriteList=()
+
+ for host in "${hostArray[@]}"; do
+ if [[ "$host" == "$HOSTNAME" ]]; then
+ doBootstrap=true
+ activeNodesConfig="$OZONE_OM_CLUSTER_IDS"
+ IFS=',' read -r -a overwriteArray <<< "$activeNodesConfig"
+ for overwriteHost in "${overwriteArray[@]}"; do
+ nodesConfigOverwriteList+=("$overwriteHost")
+ if [[ "$overwriteHost" == "$HOSTNAME" ]]; then
+ break;
+ fi
+ done
+ break
+ fi
+ done
+
+ if [ "$doBootstrap" = true ] && [ ! -f "$HELM_MANAGER_BOOTSTRAPPED_FILE"
]; then
+ echo "$HOSTNAME must be started with bootstrap arg!"
+ overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")"
+ run_bootstrap "$overwriteCmd"
+ else
+ echo "$HOSTNAME must not be started with bootstrap arg, or is already
bootstrapped."
+ exit 0
+ fi
+{{- end }}
diff --git a/charts/ozone/templates/om/om-service-headless.yaml
b/charts/ozone/templates/om/om-service-headless.yaml
index d16659b..8aaccaa 100644
--- a/charts/ozone/templates/om/om-service-headless.yaml
+++ b/charts/ozone/templates/om/om-service-headless.yaml
@@ -28,6 +28,12 @@ spec:
ports:
- name: ui
port: {{ .Values.om.service.port }}
+ - name: rpc
+ port: {{ .Values.om.service.rpcPort }}
+ {{- if gt (int .Values.om.replicas) 1 }}
+ - name: ratis
+ port: {{ .Values.om.service.ratisPort }}
+ {{- end }}
selector:
{{- include "ozone.selectorLabels" . | nindent 4 }}
app.kubernetes.io/component: om
diff --git a/charts/ozone/templates/om/om-statefulset.yaml
b/charts/ozone/templates/om/om-statefulset.yaml
index 379cec8..75f7d89 100644
--- a/charts/ozone/templates/om/om-statefulset.yaml
+++ b/charts/ozone/templates/om/om-statefulset.yaml
@@ -18,11 +18,12 @@
{{- $env := concat .Values.env .Values.om.env }}
{{- $envFrom := concat .Values.envFrom .Values.om.envFrom }}
-{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations)
.Values.om.podAnnotations }}
+{{- $podAnnotations := mergeOverwrite (deepCopy (default dict
.Values.podAnnotations)) (default dict .Values.om.podAnnotations) }}
{{- $nodeSelector := or .Values.om.nodeSelector .Values.nodeSelector }}
{{- $affinity := or .Values.om.affinity .Values.affinity }}
{{- $tolerations := or .Values.om.tolerations .Values.tolerations }}
{{- $securityContext := or .Values.om.securityContext .Values.securityContext
}}
+{{- $bnodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes" .))
(list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }}
apiVersion: apps/v1
kind: StatefulSet
metadata:
@@ -40,7 +41,7 @@ spec:
template:
metadata:
annotations:
- checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | sha256sum }}
+ checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) |
sha256sum }}
{{- with $podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
@@ -48,6 +49,28 @@ spec:
{{- include "ozone.selectorLabels" . | nindent 8 }}
app.kubernetes.io/component: om
spec:
+ {{- if and .Values.om.persistence.enabled (gt (len $bnodes) 0) }}
+ initContainers:
+ - name: om-bootstrap
+ image: "{{ .Values.image.repository }}:{{ .Values.image.tag |
default .Chart.AppVersion }}"
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ command: ["/bin/sh", "/scripts/om-bootstrap.sh"]
+ env:
+ {{- include "ozone.configuration.env" . | nindent 12 }}
+ {{- with $env }}
+ {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ {{- with $envFrom }}
+ envFrom: {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ volumeMounts:
+ - name: config
+ mountPath: {{ .Values.configuration.dir }}
+ - name: {{ .Release.Name }}-om
+ mountPath: {{ .Values.om.persistence.path }}
+ - name: om-bootstrap-script
+ mountPath: /scripts
+ {{- end }}
containers:
- name: om
image: "{{ .Values.image.repository }}:{{ .Values.image.tag |
default .Chart.AppVersion }}"
@@ -55,13 +78,11 @@ spec:
{{- with .Values.om.command }}
command: {{- tpl (toYaml .) $ | nindent 12 }}
{{- end }}
- {{- with .Values.om.args }}
- args: {{- tpl (toYaml .) $ | nindent 12 }}
- {{- end }}
+ args: {{- tpl (toYaml .Values.om.args) $ | nindent 12 }}
env:
{{- include "ozone.configuration.env" . | nindent 12 }}
- name: WAITFOR
- value: {{ $.Release.Name }}-scm-0.{{ $.Release.Name
}}-scm-headless:9876
+ value: {{ $.Release.Name }}-scm-0.{{ $.Release.Name
}}-scm-headless:{{ .Values.scm.service.port }}
- name: ENSURE_OM_INITIALIZED
value: /data/metadata/om/current/VERSION
{{- with $env }}
@@ -72,9 +93,11 @@ spec:
{{- end }}
ports:
- name: rpc
- containerPort: 9862
+ containerPort: {{ .Values.om.service.rpcPort }}
- name: ui
containerPort: {{ .Values.om.service.port }}
+ - name: ratis
+ containerPort: {{ .Values.om.service.ratisPort }}
livenessProbe:
httpGet:
path: /
@@ -101,6 +124,12 @@ spec:
securityContext: {{- toYaml . | nindent 8 }}
{{- end }}
volumes:
+ {{- if and .Values.om.persistence.enabled (gt (len $bnodes) 0) }}
+ - name: om-bootstrap-script
+ configMap:
+ name: {{ .Release.Name }}-om-bootstrap-script
+ defaultMode: 0777
+ {{- end }}
- name: config
projected:
sources:
@@ -111,7 +140,7 @@ spec:
{{- end }}
{{- if not .Values.om.persistence.enabled }}
- name: {{ .Release.Name }}-om
- emptyDir: {}
+ emptyDir: { }
{{- end }}
{{- if .Values.om.persistence.enabled }}
volumeClaimTemplates:
diff --git a/charts/ozone/templates/ozone-configmap.yaml
b/charts/ozone/templates/ozone-configmap.yaml
index dbee026..94f101a 100644
--- a/charts/ozone/templates/ozone-configmap.yaml
+++ b/charts/ozone/templates/ozone-configmap.yaml
@@ -21,5 +21,9 @@ kind: ConfigMap
metadata:
name: {{ .Release.Name }}-ozone
labels: {{- include "ozone.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": pre-upgrade, pre-install
+ "helm.sh/hook-weight": "-10"
+ "helm.sh/resource-policy": keep
data:
{{- tpl (toYaml .Values.configuration.files) $ | nindent 4 }}
diff --git a/charts/ozone/templates/s3g/s3g-statefulset.yaml
b/charts/ozone/templates/s3g/s3g-statefulset.yaml
index 4a11f07..51f15fb 100644
--- a/charts/ozone/templates/s3g/s3g-statefulset.yaml
+++ b/charts/ozone/templates/s3g/s3g-statefulset.yaml
@@ -18,7 +18,7 @@
{{- $env := concat .Values.env .Values.s3g.env }}
{{- $envFrom := concat .Values.envFrom .Values.s3g.envFrom }}
-{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations)
.Values.s3g.podAnnotations }}
+{{- $podAnnotations := mergeOverwrite (deepCopy (default dict
.Values.podAnnotations)) (default dict .Values.s3g.podAnnotations) }}
{{- $nodeSelector := or .Values.s3g.nodeSelector .Values.nodeSelector }}
{{- $affinity := or .Values.s3g.affinity .Values.affinity }}
{{- $tolerations := or .Values.s3g.tolerations .Values.tolerations }}
@@ -40,7 +40,7 @@ spec:
template:
metadata:
annotations:
- checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | sha256sum }}
+ checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) |
sha256sum }}
{{- with $podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
diff --git a/charts/ozone/templates/scm/scm-service-headless.yaml
b/charts/ozone/templates/scm/scm-service-headless.yaml
index dce5857..d71c004 100644
--- a/charts/ozone/templates/scm/scm-service-headless.yaml
+++ b/charts/ozone/templates/scm/scm-service-headless.yaml
@@ -28,6 +28,18 @@ spec:
ports:
- name: ui
port: {{ .Values.scm.service.port }}
+ - name: rpc-datanode
+ port: {{ .Values.scm.service.rpcDatanodePort }}
+ - name: block-client
+ port: {{ .Values.scm.service.blockClientPort }}
+ - name: rpc-client
+ port: {{ .Values.scm.service.rpcClientPort }}
+ {{- if gt (int .Values.scm.replicas) 1 }}
+ - name: ratis
+ port: {{ .Values.scm.service.ratisPort }}
+ - name: grpc
+ port: {{ .Values.scm.service.grpcPort }}
+ {{- end }}
selector:
{{- include "ozone.selectorLabels" . | nindent 4 }}
app.kubernetes.io/component: scm
diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml
b/charts/ozone/templates/scm/scm-statefulset.yaml
index 27cf1f3..6c1d144 100644
--- a/charts/ozone/templates/scm/scm-statefulset.yaml
+++ b/charts/ozone/templates/scm/scm-statefulset.yaml
@@ -18,7 +18,7 @@
{{- $env := concat .Values.env .Values.scm.env }}
{{- $envFrom := concat .Values.envFrom .Values.scm.envFrom }}
-{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations)
.Values.scm.podAnnotations }}
+{{- $podAnnotations := mergeOverwrite (deepCopy (default dict
.Values.podAnnotations)) (default dict .Values.scm.podAnnotations) }}
{{- $nodeSelector := or .Values.scm.nodeSelector .Values.nodeSelector }}
{{- $affinity := or .Values.scm.affinity .Values.affinity }}
{{- $tolerations := or .Values.scm.tolerations .Values.tolerations }}
@@ -32,6 +32,7 @@ metadata:
app.kubernetes.io/component: scm
spec:
replicas: {{ .Values.scm.replicas }}
+ podManagementPolicy: Parallel
serviceName: {{ .Release.Name }}-scm-headless
selector:
matchLabels:
@@ -40,7 +41,7 @@ spec:
template:
metadata:
annotations:
- checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | sha256sum }}
+ checksum/config: {{ include (print $.Template.BasePath
"/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) |
sha256sum }}
{{- with $podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
@@ -65,6 +66,24 @@ spec:
mountPath: {{ .Values.configuration.dir }}
- name: {{ .Release.Name }}-scm
mountPath: {{ .Values.scm.persistence.path }}
+ {{- if gt (int .Values.scm.replicas) 1 }}
+ - name: bootstrap
+ image: "{{ .Values.image.repository }}:{{ .Values.image.tag |
default .Chart.AppVersion }}"
+ args: ["ozone", "scm", "--bootstrap"]
+ env:
+ {{- include "ozone.configuration.env" . | nindent 12 }}
+ {{- with $env }}
+ {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ {{- with $envFrom }}
+ envFrom: {{- tpl (toYaml .) $ | nindent 12 }}
+ {{- end }}
+ volumeMounts:
+ - name: config
+ mountPath: {{ .Values.configuration.dir }}
+ - name: {{ .Release.Name }}-scm
+ mountPath: {{ .Values.scm.persistence.path }}
+ {{- end }}
containers:
- name: scm
image: "{{ .Values.image.repository }}:{{ .Values.image.tag |
default .Chart.AppVersion }}"
@@ -85,11 +104,19 @@ spec:
{{- end }}
ports:
- name: rpc-client
- containerPort: 9860
+ containerPort: {{ .Values.scm.service.rpcClientPort }}
+ - name: block-client
+ containerPort: {{ .Values.scm.service.blockClientPort }}
- name: rpc-datanode
- containerPort: 9861
+ containerPort: {{ .Values.scm.service.rpcDatanodePort }}
- name: ui
containerPort: {{ .Values.scm.service.port }}
+ {{- if gt (int .Values.scm.replicas) 1 }}
+ - name: ratis
+ containerPort: {{ .Values.scm.service.ratisPort }}
+ - name: grpc
+ containerPort: {{ .Values.scm.service.grpcPort }}
+ {{- end }}
livenessProbe:
httpGet:
path: /
diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml
index a06ea87..d89032a 100644
--- a/charts/ozone/values.yaml
+++ b/charts/ozone/values.yaml
@@ -21,6 +21,9 @@ image:
imagePullSecrets: []
+# Cluster ID
+clusterId: cluster1
+
# Common environment variables (templated)
env: []
# Common envFrom items to set up environment variables (templated)
@@ -89,6 +92,8 @@ datanode:
service:
type: ClusterIP
port: 9882
+ ratisIpcPort: 9858
+ ipcPort: 9859
nodePort: ~
labels: {}
annotations: {}
@@ -109,7 +114,7 @@ datanode:
# Ozone Manager configuration
om:
# Number of Ozone Manager replicas
- replicas: 1
+ replicas: 3
# Command to launch Ozone Manager (templated)
command: ~
# Arguments to launch Ozone Manager (templated)
@@ -125,7 +130,18 @@ om:
# Constrain Ozone Manager pods to nodes with specific node labels
nodeSelector: {}
# Constrain Ozone Manager pods to nodes by affinity/anti-affinity rules
- affinity: {}
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ podAffinityTerm:
+ labelSelector:
+ matchExpressions:
+ - key: app.kubernetes.io/component
+ operator: In
+ values:
+ - scm
+ topologyKey: kubernetes.io/hostname
# Allow to schedule Ozone Manager pods on nodes with matching taints
tolerations: []
# Ozone Manager security context (overwrites common security context)
@@ -134,6 +150,8 @@ om:
service:
type: ClusterIP
port: 9874
+ ratisPort: 9872
+ rpcPort: 9862
nodePort: ~
labels: {}
annotations: {}
@@ -151,6 +169,65 @@ om:
# The name of a specific storage class name to use
storageClassName: ~
+# Storage Container Manager configuration
+scm:
+ # Number of Storage Container Manager replicas
+ replicas: 1
+ # Command to launch Storage Container Manager (templated)
+ command: ~
+ # Arguments to launch Storage Container Manager (templated)
+ args: ["ozone", "scm"]
+ # Additional Storage Container Manager environment variables (templated)
+ env: []
+ # Additional Storage Container Manager envFrom items to set up environment
variables (templated)
+ envFrom: []
+ # Storage Container Manager resource requests and limits
+ resources: {}
+ # Constrain Storage Container Manager pods to nodes with specific node labels
+ nodeSelector: {}
+ # Constrain Storage Container Manager pods to nodes by
affinity/anti-affinity rules
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ podAffinityTerm:
+ labelSelector:
+ matchExpressions:
+ - key: app.kubernetes.io/component
+ operator: In
+ values:
+ - om
+ topologyKey: kubernetes.io/hostname
+ # Allow to schedule Storage Container Manager pods on nodes with matching
taints
+ tolerations: []
+ # Storage Container Manager security context (overwrites common security
context)
+ securityContext: {}
+ # Storage Container Manager service configuration
+ service:
+ type: ClusterIP
+ port: 9876
+ rpcDatanodePort: 9861
+ blockClientPort: 9863
+ rpcClientPort: 9860
+ ratisPort: 9894
+ grpcPort: 9895
+ nodePort: ~
+ labels: {}
+ annotations: {}
+ # Storage Container Manager persistence
+ persistence:
+ # Enable persistence
+ enabled: false
+ # Persistence access modes
+ accessModes:
+ - ReadWriteOnce
+ # Path for Storage Container Manager volume mount
+ path: /data
+ # Volume size
+ size: 10Gi
+ # The name of a specific storage class name to use
+ storageClassName: ~
+
# S3 Gateway configuration
s3g:
# Number of S3 Gateway replicas
@@ -203,17 +280,11 @@ s3g:
# The name of a specific storage class name to use
storageClassName: ~
-# Storage Container Manager configuration
-scm:
- # Number of Storage Container Manager replicas
- replicas: 1
- # Command to launch Storage Container Manager (templated)
- command: ~
- # Arguments to launch Storage Container Manager (templated)
- args: ["ozone", "scm"]
- # Additional Storage Container Manager environment variables (templated)
+# Helm Manager configuration
+helm:
+ # Additional Helm Manager environment variables (templated)
env: []
- # Additional Storage Container Manager envFrom items to set up environment
variables (templated)
+ # Additional Helm Manager envFrom items to set up environment variables
(templated)
envFrom: []
# Storage Container Manager resource requests and limits
resources: {}
@@ -221,20 +292,19 @@ scm:
podAnnotations: {}
# Constrain Storage Container Manager pods to nodes with specific node labels
nodeSelector: {}
- # Constrain Storage Container Manager pods to nodes by
affinity/anti-affinity rules
+ # Constrain Helm Manager pods to nodes by affinity/anti-affinity rules
affinity: {}
- # Allow to schedule Storage Container Manager pods on nodes with matching
taints
+ # Allow to schedule Helm Manager pods on nodes with matching taints
tolerations: []
- # Storage Container Manager security context (overwrites common security
context)
+ # Helm Manager security context (overwrites common security context)
securityContext: {}
- # Storage Container Manager service configuration
- service:
- type: ClusterIP
- port: 9876
- nodePort: ~
- labels: {}
- annotations: {}
- # Storage Container Manager persistence
+ # Decommissioning is handled with a post-upgrade helm hook job.
+ # To avoid endless retries of decommissioning, this limit is set.
+ # This can happen if PVC has been deleted or is not reachable.
+ # This is used for decommissioning OM
+ backoffLimit: 5
+ # Helm Manager persistence (this is enabled automatically if al least one
+ # of datanode, scm or om is enabled)
persistence:
# Enable persistence
enabled: false
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]