This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ca5acda161 Adding startupProbe to airflow services charts (#33107)
ca5acda161 is described below
commit ca5acda1617a5cdb1d04f125568ffbd264209ec7
Author: Amogh Desai <[email protected]>
AuthorDate: Tue Aug 8 01:33:38 2023 +0530
Adding startupProbe to airflow services charts (#33107)
---
chart/templates/_helpers.yaml | 33 ++++++++++++
chart/templates/configmaps/configmap.yaml | 8 +--
.../templates/scheduler/scheduler-deployment.yaml | 11 ++++
.../templates/webserver/webserver-deployment.yaml | 13 +++++
chart/values.schema.json | 62 ++++++++++++++++++++++
chart/values.yaml | 14 +++++
helm_tests/airflow_core/test_scheduler.py | 42 +++++++++++++++
helm_tests/webserver/test_webserver.py | 25 +++++++--
8 files changed, 201 insertions(+), 7 deletions(-)
diff --git a/chart/templates/_helpers.yaml b/chart/templates/_helpers.yaml
index 742375f21e..e210222846 100644
--- a/chart/templates/_helpers.yaml
+++ b/chart/templates/_helpers.yaml
@@ -693,6 +693,39 @@ server_tls_key_file = /etc/pgbouncer/server.key
{{- end }}
{{- end }}
+
+{{- define "scheduler_startup_check_command" }}
+ {{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
+ - sh
+ - -c
+ - |
+ CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec
/entrypoint \
+ airflow jobs check --job-type SchedulerJob --local
+ {{- else if semverCompare ">=2.1.0" .Values.airflowVersion }}
+ - sh
+ - -c
+ - |
+ CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec
/entrypoint \
+ airflow jobs check --job-type SchedulerJob --hostname $(hostname)
+ {{- else }}
+ - sh
+ - -c
+ - |
+ CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -Wignore -c "
+ import os
+ os.environ['AIRFLOW__CORE__LOGGING_LEVEL'] = 'ERROR'
+ os.environ['AIRFLOW__LOGGING__LOGGING_LEVEL'] = 'ERROR'
+ from airflow.jobs.scheduler_job import SchedulerJob
+ from airflow.utils.db import create_session
+ from airflow.utils.net import get_hostname
+ import sys
+ with create_session() as session:
+ job =
session.query(SchedulerJob).filter_by(hostname=get_hostname()).order_by(
+ SchedulerJob.latest_heartbeat.desc()).limit(1).first()
+ sys.exit(0 if job.is_alive() else 1)"
+ {{- end }}
+{{- end }}
+
{{- define "triggerer_liveness_check_command" }}
{{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
- sh
diff --git a/chart/templates/configmaps/configmap.yaml
b/chart/templates/configmaps/configmap.yaml
index 54fc62cd41..b69d144f99 100644
--- a/chart/templates/configmaps/configmap.yaml
+++ b/chart/templates/configmaps/configmap.yaml
@@ -57,16 +57,16 @@ data:
{{- .Values.dags.gitSync.knownHosts | nindent 4 }}
{{- end }}
- {{- if or (eq $.Values.executor "LocalKubernetesExecutor") (eq
$.Values.executor "KubernetesExecutor") (eq $.Values.executor
"CeleryKubernetesExecutor") }}
- {{- if semverCompare ">=1.10.12" .Values.airflowVersion }}
+{{/* {{- if or (eq $.Values.executor "LocalKubernetesExecutor") (eq
$.Values.executor "KubernetesExecutor") (eq $.Values.executor
"CeleryKubernetesExecutor") }}*/}}
+{{/* {{- if semverCompare ">=1.10.12" .Values.airflowVersion }}*/}}
pod_template_file.yaml: |-
{{- if .Values.podTemplate }}
{{- tpl .Values.podTemplate . | nindent 4 }}
{{- else }}
{{- tpl (.Files.Get "files/pod-template-file.kubernetes-helm-yaml") . |
nindent 4 }}
{{- end }}
- {{- end }}
- {{- end }}
+{{/* {{- end }}*/}}
+{{/* {{- end }}*/}}
{{- if .Values.kerberos.enabled }}
krb5.conf: |-
diff --git a/chart/templates/scheduler/scheduler-deployment.yaml
b/chart/templates/scheduler/scheduler-deployment.yaml
index 1c752dc003..60e4101618 100644
--- a/chart/templates/scheduler/scheduler-deployment.yaml
+++ b/chart/templates/scheduler/scheduler-deployment.yaml
@@ -195,6 +195,17 @@ spec:
{{- else }}
{{- include "scheduler_liveness_check_command" . | indent 14
}}
{{- end }}
+ startupProbe:
+ timeoutSeconds: {{ .Values.scheduler.startupProbe.timeoutSeconds }}
+ failureThreshold: {{
.Values.scheduler.startupProbe.failureThreshold }}
+ periodSeconds: {{ .Values.scheduler.startupProbe.periodSeconds }}
+ exec:
+ command:
+ {{- if .Values.scheduler.startupProbe.command }}
+ {{- toYaml .Values.scheduler.startupProbe.command | nindent
16 }}
+ {{- else }}
+ {{- include "scheduler_startup_check_command" . | indent 14
}}
+ {{- end }}
{{- if and $local (not $elasticsearch) }}
# Serve logs if we're in local mode and we don't have elasticsearch
enabled.
ports:
diff --git a/chart/templates/webserver/webserver-deployment.yaml
b/chart/templates/webserver/webserver-deployment.yaml
index 422d467093..7f326bab22 100644
--- a/chart/templates/webserver/webserver-deployment.yaml
+++ b/chart/templates/webserver/webserver-deployment.yaml
@@ -227,6 +227,19 @@ spec:
timeoutSeconds: {{ .Values.webserver.readinessProbe.timeoutSeconds
}}
failureThreshold: {{
.Values.webserver.readinessProbe.failureThreshold }}
periodSeconds: {{ .Values.webserver.readinessProbe.periodSeconds }}
+ startupProbe:
+ httpGet:
+ path: {{ if .Values.config.webserver.base_url }}{{- with
urlParse (tpl .Values.config.webserver.base_url .) }}{{ .path }}{{ end }}{{ end
}}/health
+ port: {{ .Values.ports.airflowUI }}
+ {{- if .Values.config.webserver.base_url}}
+ httpHeaders:
+ - name: Host
+ value: {{ regexReplaceAll ":\\d+$" (urlParse (tpl
.Values.config.webserver.base_url .)).host "" }}
+ {{- end }}
+ scheme: {{ .Values.webserver.startupProbe.scheme | default
"http" }}
+ timeoutSeconds: {{ .Values.webserver.startupProbe.timeoutSeconds }}
+ failureThreshold: {{
.Values.webserver.startupProbe.failureThreshold }}
+ periodSeconds: {{ .Values.webserver.startupProbe.periodSeconds }}
envFrom: {{- include "custom_airflow_environment_from" . | default
"\n []" | indent 10 }}
env:
{{- include "custom_airflow_environment" . | indent 10 }}
diff --git a/chart/values.schema.json b/chart/values.schema.json
index f56f47053d..f3e62911ea 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -1960,6 +1960,41 @@
}
}
},
+ "startupProbe": {
+ "description": "Startup probe configuration for scheduler
container.",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "timeoutSeconds": {
+ "description": "Number of seconds after which the
probe times out. Minimum value is 1 seconds.",
+ "type": "integer",
+ "default": 20
+ },
+ "failureThreshold": {
+ "description": "Minimum consecutive failures for
the probe to be considered failed after having succeeded. Minimum value is 1.",
+ "type": "integer",
+ "default": 6
+ },
+ "periodSeconds": {
+ "description": "How often (in seconds) to perform
the probe. Minimum value is 1.",
+ "type": "integer",
+ "default": 10
+ },
+ "command": {
+ "description": "Command for livenessProbe",
+ "type": [
+ "array",
+ "null"
+ ],
+ "items": {
+ "type": [
+ "string",
+ "null"
+ ]
+ }
+ }
+ }
+ },
"replicas": {
"description": "Airflow 2.0 allows users to run multiple
schedulers. This feature is only recommended for MySQL 8+ and PostgreSQL",
"type": "integer",
@@ -3762,6 +3797,33 @@
}
}
},
+ "startupProbe": {
+ "description": "Startup probe configuration.",
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "timeoutSeconds": {
+ "description": "Webserver Startup probe timeout
seconds.",
+ "type": "integer",
+ "default": 20
+ },
+ "failureThreshold": {
+ "description": "Webserver Startup probe failure
threshold.",
+ "type": "integer",
+ "default": 6
+ },
+ "periodSeconds": {
+ "description": "Webserver Startup probe period
seconds.",
+ "type": "integer",
+ "default": 10
+ },
+ "scheme": {
+ "description": "Webserver Startup probe scheme.",
+ "type": "string",
+ "default": "HTTP"
+ }
+ }
+ },
"replicas": {
"description": "How many Airflow webserver replicas should
run.",
"type": "integer",
diff --git a/chart/values.yaml b/chart/values.yaml
index 0b4ce06edc..9b43582678 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -721,6 +721,14 @@ scheduler:
failureThreshold: 5
periodSeconds: 60
command: ~
+
+ # Wait for at most 10 minutes (6*10s) for the scheduler container to startup.
+ # livenessProbe kicks in after the startup
+ startupProbe:
+ failureThreshold: 6
+ periodSeconds: 10
+ timeoutSeconds: 20
+ command: ~
# Airflow 2.0 allows users to run multiple schedulers,
# However this feature is only recommended for MySQL 8+ and Postgres
replicas: 1
@@ -1066,6 +1074,12 @@ webserver:
periodSeconds: 10
scheme: HTTP
+ startupProbe:
+ timeoutSeconds: 20
+ failureThreshold: 6
+ periodSeconds: 10
+ scheme: HTTP
+
# Number of webservers
replicas: 1
# Max number of old replicasets to retain
diff --git a/helm_tests/airflow_core/test_scheduler.py
b/helm_tests/airflow_core/test_scheduler.py
index 57ab3bbe5b..cb0aa7e669 100644
--- a/helm_tests/airflow_core/test_scheduler.py
+++ b/helm_tests/airflow_core/test_scheduler.py
@@ -357,6 +357,30 @@ class TestScheduler:
"spec.template.spec.containers[0].livenessProbe.exec.command",
docs[0]
)
+ def test_startupprobe_values_are_configurable(self):
+ docs = render_chart(
+ values={
+ "scheduler": {
+ "startupProbe": {
+ "timeoutSeconds": 111,
+ "failureThreshold": 222,
+ "periodSeconds": 333,
+ "command": ["sh", "-c", "echo", "wow such test"],
+ }
+ },
+ },
+ show_only=["templates/scheduler/scheduler-deployment.yaml"],
+ )
+
+ assert 111 ==
jmespath.search("spec.template.spec.containers[0].startupProbe.timeoutSeconds",
docs[0])
+ assert 222 == jmespath.search(
+ "spec.template.spec.containers[0].startupProbe.failureThreshold",
docs[0]
+ )
+ assert 333 ==
jmespath.search("spec.template.spec.containers[0].startupProbe.periodSeconds",
docs[0])
+ assert ["sh", "-c", "echo", "wow such test"] == jmespath.search(
+ "spec.template.spec.containers[0].startupProbe.exec.command",
docs[0]
+ )
+
@pytest.mark.parametrize(
"airflow_version, probe_command",
[
@@ -375,6 +399,24 @@ class TestScheduler:
in
jmespath.search("spec.template.spec.containers[0].livenessProbe.exec.command",
docs[0])[-1]
)
+ @pytest.mark.parametrize(
+ "airflow_version, probe_command",
+ [
+ ("1.9.0", "from airflow.jobs.scheduler_job import SchedulerJob"),
+ ("2.1.0", "airflow jobs check --job-type SchedulerJob --hostname
$(hostname)"),
+ ("2.5.0", "airflow jobs check --job-type SchedulerJob --local"),
+ ],
+ )
+ def test_startupprobe_command_depends_on_airflow_version(self,
airflow_version, probe_command):
+ docs = render_chart(
+ values={"airflowVersion": f"{airflow_version}"},
+ show_only=["templates/scheduler/scheduler-deployment.yaml"],
+ )
+ assert (
+ probe_command
+ in
jmespath.search("spec.template.spec.containers[0].startupProbe.exec.command",
docs[0])[-1]
+ )
+
@pytest.mark.parametrize(
"log_persistence_values, expected_volume",
[
diff --git a/helm_tests/webserver/test_webserver.py
b/helm_tests/webserver/test_webserver.py
index d76cdb2eaa..cbb9b0a0fa 100644
--- a/helm_tests/webserver/test_webserver.py
+++ b/helm_tests/webserver/test_webserver.py
@@ -25,7 +25,7 @@ from tests.charts.helm_template_generator import render_chart
class TestWebserverDeployment:
"""Tests webserver deployment."""
- def test_should_add_host_header_to_liveness_and_readiness_probes(self):
+ def
test_should_add_host_header_to_liveness_and_readiness_and_startup_probes(self):
docs = render_chart(
values={
"config": {
@@ -41,8 +41,11 @@ class TestWebserverDeployment:
assert {"name": "Host", "value": "example.com"} in jmespath.search(
"spec.template.spec.containers[0].readinessProbe.httpGet.httpHeaders", docs[0]
)
+ assert {"name": "Host", "value": "example.com"} in jmespath.search(
+
"spec.template.spec.containers[0].startupProbe.httpGet.httpHeaders", docs[0]
+ )
- def test_should_add_path_to_liveness_and_readiness_probes(self):
+ def
test_should_add_path_to_liveness_and_readiness_and_startup_probes(self):
docs = render_chart(
values={
"config": {
@@ -60,6 +63,10 @@ class TestWebserverDeployment:
jmespath.search("spec.template.spec.containers[0].readinessProbe.httpGet.path",
docs[0])
== "/mypath/path/health"
)
+ assert (
+
jmespath.search("spec.template.spec.containers[0].startupProbe.httpGet.path",
docs[0])
+ == "/mypath/path/health"
+ )
@pytest.mark.parametrize(
"revision_history_limit, global_revision_history_limit",
@@ -91,6 +98,10 @@ class TestWebserverDeployment:
jmespath.search("spec.template.spec.containers[0].readinessProbe.httpGet.httpHeaders",
docs[0])
is None
)
+ assert (
+
jmespath.search("spec.template.spec.containers[0].startupProbe.httpGet.httpHeaders",
docs[0])
+ is None
+ )
def test_should_use_templated_base_url_for_probes(self):
docs = render_chart(
@@ -111,15 +122,20 @@ class TestWebserverDeployment:
assert {"name": "Host", "value": "release-name.com"} in
jmespath.search(
"readinessProbe.httpGet.httpHeaders", container
)
+ assert {"name": "Host", "value": "release-name.com"} in
jmespath.search(
+ "startupProbe.httpGet.httpHeaders", container
+ )
assert "/mypath/release-name/path/health" ==
jmespath.search("livenessProbe.httpGet.path", container)
assert "/mypath/release-name/path/health" ==
jmespath.search("readinessProbe.httpGet.path", container)
+ assert "/mypath/release-name/path/health" ==
jmespath.search("startupProbe.httpGet.path", container)
- def test_should_add_scheme_to_liveness_and_readiness_probes(self):
+ def
test_should_add_scheme_to_liveness_and_readiness_and_startup_probes(self):
docs = render_chart(
values={
"webserver": {
"livenessProbe": {"scheme": "HTTPS"},
"readinessProbe": {"scheme": "HTTPS"},
+ "startupProbe": {"scheme": "HTTPS"},
}
},
show_only=["templates/webserver/webserver-deployment.yaml"],
@@ -131,6 +147,9 @@ class TestWebserverDeployment:
assert "HTTPS" in jmespath.search(
"spec.template.spec.containers[0].readinessProbe.httpGet.scheme",
docs[0]
)
+ assert "HTTPS" in jmespath.search(
+ "spec.template.spec.containers[0].startupProbe.httpGet.scheme",
docs[0]
+ )
def
test_should_add_volume_and_volume_mount_when_exist_webserver_config(self):
docs = render_chart(