This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new ca5acda161 Adding startupProbe to airflow services charts (#33107)
ca5acda161 is described below

commit ca5acda1617a5cdb1d04f125568ffbd264209ec7
Author: Amogh Desai <[email protected]>
AuthorDate: Tue Aug 8 01:33:38 2023 +0530

    Adding startupProbe to airflow services charts (#33107)
---
 chart/templates/_helpers.yaml                      | 33 ++++++++++++
 chart/templates/configmaps/configmap.yaml          |  8 +--
 .../templates/scheduler/scheduler-deployment.yaml  | 11 ++++
 .../templates/webserver/webserver-deployment.yaml  | 13 +++++
 chart/values.schema.json                           | 62 ++++++++++++++++++++++
 chart/values.yaml                                  | 14 +++++
 helm_tests/airflow_core/test_scheduler.py          | 42 +++++++++++++++
 helm_tests/webserver/test_webserver.py             | 25 +++++++--
 8 files changed, 201 insertions(+), 7 deletions(-)

diff --git a/chart/templates/_helpers.yaml b/chart/templates/_helpers.yaml
index 742375f21e..e210222846 100644
--- a/chart/templates/_helpers.yaml
+++ b/chart/templates/_helpers.yaml
@@ -693,6 +693,39 @@ server_tls_key_file = /etc/pgbouncer/server.key
   {{- end }}
 {{- end }}
 
+
+{{- define  "scheduler_startup_check_command" }}
+  {{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
+  - sh
+  - -c
+  - |
+    CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec 
/entrypoint \
+    airflow jobs check --job-type SchedulerJob --local
+  {{- else if semverCompare ">=2.1.0" .Values.airflowVersion }}
+  - sh
+  - -c
+  - |
+    CONNECTION_CHECK_MAX_COUNT=0 AIRFLOW__LOGGING__LOGGING_LEVEL=ERROR exec 
/entrypoint \
+    airflow jobs check --job-type SchedulerJob --hostname $(hostname)
+  {{- else }}
+  - sh
+  - -c
+  - |
+    CONNECTION_CHECK_MAX_COUNT=0 exec /entrypoint python -Wignore -c "
+    import os
+    os.environ['AIRFLOW__CORE__LOGGING_LEVEL'] = 'ERROR'
+    os.environ['AIRFLOW__LOGGING__LOGGING_LEVEL'] = 'ERROR'
+    from airflow.jobs.scheduler_job import SchedulerJob
+    from airflow.utils.db import create_session
+    from airflow.utils.net import get_hostname
+    import sys
+    with create_session() as session:
+        job = 
session.query(SchedulerJob).filter_by(hostname=get_hostname()).order_by(
+            SchedulerJob.latest_heartbeat.desc()).limit(1).first()
+    sys.exit(0 if job.is_alive() else 1)"
+  {{- end }}
+{{- end }}
+
 {{- define "triggerer_liveness_check_command" }}
   {{- if semverCompare ">=2.5.0" .Values.airflowVersion }}
   - sh
diff --git a/chart/templates/configmaps/configmap.yaml 
b/chart/templates/configmaps/configmap.yaml
index 54fc62cd41..b69d144f99 100644
--- a/chart/templates/configmaps/configmap.yaml
+++ b/chart/templates/configmaps/configmap.yaml
@@ -57,16 +57,16 @@ data:
     {{- .Values.dags.gitSync.knownHosts | nindent 4 }}
   {{- end }}
 
-  {{- if or (eq $.Values.executor "LocalKubernetesExecutor") (eq 
$.Values.executor "KubernetesExecutor") (eq $.Values.executor 
"CeleryKubernetesExecutor") }}
-  {{- if semverCompare ">=1.10.12" .Values.airflowVersion }}
+{{/*  {{- if or (eq $.Values.executor "LocalKubernetesExecutor") (eq 
$.Values.executor "KubernetesExecutor") (eq $.Values.executor 
"CeleryKubernetesExecutor") }}*/}}
+{{/*  {{- if semverCompare ">=1.10.12" .Values.airflowVersion }}*/}}
   pod_template_file.yaml: |-
     {{- if .Values.podTemplate }}
       {{- tpl .Values.podTemplate . | nindent 4 }}
     {{- else }}
       {{- tpl (.Files.Get "files/pod-template-file.kubernetes-helm-yaml") . | 
nindent 4 }}
     {{- end }}
-  {{- end }}
-  {{- end }}
+{{/*  {{- end }}*/}}
+{{/*  {{- end }}*/}}
 
   {{- if .Values.kerberos.enabled }}
   krb5.conf: |-
diff --git a/chart/templates/scheduler/scheduler-deployment.yaml 
b/chart/templates/scheduler/scheduler-deployment.yaml
index 1c752dc003..60e4101618 100644
--- a/chart/templates/scheduler/scheduler-deployment.yaml
+++ b/chart/templates/scheduler/scheduler-deployment.yaml
@@ -195,6 +195,17 @@ spec:
                 {{- else }}
                   {{- include "scheduler_liveness_check_command" . | indent 14 
}}
                 {{- end }}
+          startupProbe:
+            timeoutSeconds: {{ .Values.scheduler.startupProbe.timeoutSeconds }}
+            failureThreshold: {{ 
.Values.scheduler.startupProbe.failureThreshold }}
+            periodSeconds: {{ .Values.scheduler.startupProbe.periodSeconds }}
+            exec:
+              command:
+                {{- if .Values.scheduler.startupProbe.command }}
+                  {{- toYaml .Values.scheduler.startupProbe.command  | nindent 
16 }}
+                {{- else }}
+                  {{- include "scheduler_startup_check_command" . | indent 14 
}}
+                {{- end }}
           {{- if and $local (not $elasticsearch) }}
           # Serve logs if we're in local mode and we don't have elasticsearch 
enabled.
           ports:
diff --git a/chart/templates/webserver/webserver-deployment.yaml 
b/chart/templates/webserver/webserver-deployment.yaml
index 422d467093..7f326bab22 100644
--- a/chart/templates/webserver/webserver-deployment.yaml
+++ b/chart/templates/webserver/webserver-deployment.yaml
@@ -227,6 +227,19 @@ spec:
             timeoutSeconds: {{ .Values.webserver.readinessProbe.timeoutSeconds 
}}
             failureThreshold: {{ 
.Values.webserver.readinessProbe.failureThreshold }}
             periodSeconds: {{ .Values.webserver.readinessProbe.periodSeconds }}
+          startupProbe:
+            httpGet:
+              path: {{ if .Values.config.webserver.base_url }}{{- with 
urlParse (tpl .Values.config.webserver.base_url .) }}{{ .path }}{{ end }}{{ end 
}}/health
+              port: {{ .Values.ports.airflowUI }}
+              {{- if .Values.config.webserver.base_url}}
+              httpHeaders:
+                - name: Host
+                  value: {{ regexReplaceAll ":\\d+$" (urlParse (tpl 
.Values.config.webserver.base_url .)).host  "" }}
+              {{- end }}
+              scheme: {{ .Values.webserver.startupProbe.scheme | default 
"http" }}
+            timeoutSeconds: {{ .Values.webserver.startupProbe.timeoutSeconds }}
+            failureThreshold: {{ 
.Values.webserver.startupProbe.failureThreshold }}
+            periodSeconds: {{ .Values.webserver.startupProbe.periodSeconds }}
           envFrom: {{- include "custom_airflow_environment_from" . | default 
"\n  []" | indent 10 }}
           env:
             {{- include "custom_airflow_environment" . | indent 10 }}
diff --git a/chart/values.schema.json b/chart/values.schema.json
index f56f47053d..f3e62911ea 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -1960,6 +1960,41 @@
                         }
                     }
                 },
+                "startupProbe": {
+                    "description": "Startup probe configuration for scheduler 
container.",
+                    "type": "object",
+                    "additionalProperties": false,
+                    "properties": {
+                        "timeoutSeconds": {
+                            "description": "Number of seconds after which the 
probe times out. Minimum value is 1 seconds.",
+                            "type": "integer",
+                            "default": 20
+                        },
+                        "failureThreshold": {
+                            "description": "Minimum consecutive failures for 
the probe to be considered failed after having succeeded. Minimum value is 1.",
+                            "type": "integer",
+                            "default": 6
+                        },
+                        "periodSeconds": {
+                            "description": "How often (in seconds) to perform 
the probe. Minimum value is 1.",
+                            "type": "integer",
+                            "default": 10
+                        },
+                        "command": {
+                            "description": "Command for livenessProbe",
+                            "type": [
+                                "array",
+                                "null"
+                            ],
+                            "items": {
+                                "type": [
+                                    "string",
+                                    "null"
+                                ]
+                            }
+                        }
+                    }
+                },
                 "replicas": {
                     "description": "Airflow 2.0 allows users to run multiple 
schedulers. This feature is only recommended for MySQL 8+ and PostgreSQL",
                     "type": "integer",
@@ -3762,6 +3797,33 @@
                         }
                     }
                 },
+                "startupProbe": {
+                    "description": "Startup probe configuration.",
+                    "type": "object",
+                    "additionalProperties": false,
+                    "properties": {
+                        "timeoutSeconds": {
+                            "description": "Webserver Startup probe timeout 
seconds.",
+                            "type": "integer",
+                            "default": 20
+                        },
+                        "failureThreshold": {
+                            "description": "Webserver Startup probe failure 
threshold.",
+                            "type": "integer",
+                            "default": 6
+                        },
+                        "periodSeconds": {
+                            "description": "Webserver Startup probe period 
seconds.",
+                            "type": "integer",
+                            "default": 10
+                        },
+                        "scheme": {
+                            "description": "Webserver Startup probe scheme.",
+                            "type": "string",
+                            "default": "HTTP"
+                        }
+                    }
+                },
                 "replicas": {
                     "description": "How many Airflow webserver replicas should 
run.",
                     "type": "integer",
diff --git a/chart/values.yaml b/chart/values.yaml
index 0b4ce06edc..9b43582678 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -721,6 +721,14 @@ scheduler:
     failureThreshold: 5
     periodSeconds: 60
     command: ~
+
+  # Wait for at most 10 minutes (6*10s) for the scheduler container to startup.
+  # livenessProbe kicks in after the startup
+  startupProbe:
+    failureThreshold: 6
+    periodSeconds: 10
+    timeoutSeconds: 20
+    command: ~
   # Airflow 2.0 allows users to run multiple schedulers,
   # However this feature is only recommended for MySQL 8+ and Postgres
   replicas: 1
@@ -1066,6 +1074,12 @@ webserver:
     periodSeconds: 10
     scheme: HTTP
 
+  startupProbe:
+    timeoutSeconds: 20
+    failureThreshold: 6
+    periodSeconds: 10
+    scheme: HTTP
+
   # Number of webservers
   replicas: 1
   # Max number of old replicasets to retain
diff --git a/helm_tests/airflow_core/test_scheduler.py 
b/helm_tests/airflow_core/test_scheduler.py
index 57ab3bbe5b..cb0aa7e669 100644
--- a/helm_tests/airflow_core/test_scheduler.py
+++ b/helm_tests/airflow_core/test_scheduler.py
@@ -357,6 +357,30 @@ class TestScheduler:
             "spec.template.spec.containers[0].livenessProbe.exec.command", 
docs[0]
         )
 
+    def test_startupprobe_values_are_configurable(self):
+        docs = render_chart(
+            values={
+                "scheduler": {
+                    "startupProbe": {
+                        "timeoutSeconds": 111,
+                        "failureThreshold": 222,
+                        "periodSeconds": 333,
+                        "command": ["sh", "-c", "echo", "wow such test"],
+                    }
+                },
+            },
+            show_only=["templates/scheduler/scheduler-deployment.yaml"],
+        )
+
+        assert 111 == 
jmespath.search("spec.template.spec.containers[0].startupProbe.timeoutSeconds", 
docs[0])
+        assert 222 == jmespath.search(
+            "spec.template.spec.containers[0].startupProbe.failureThreshold", 
docs[0]
+        )
+        assert 333 == 
jmespath.search("spec.template.spec.containers[0].startupProbe.periodSeconds", 
docs[0])
+        assert ["sh", "-c", "echo", "wow such test"] == jmespath.search(
+            "spec.template.spec.containers[0].startupProbe.exec.command", 
docs[0]
+        )
+
     @pytest.mark.parametrize(
         "airflow_version, probe_command",
         [
@@ -375,6 +399,24 @@ class TestScheduler:
             in 
jmespath.search("spec.template.spec.containers[0].livenessProbe.exec.command", 
docs[0])[-1]
         )
 
+    @pytest.mark.parametrize(
+        "airflow_version, probe_command",
+        [
+            ("1.9.0", "from airflow.jobs.scheduler_job import SchedulerJob"),
+            ("2.1.0", "airflow jobs check --job-type SchedulerJob --hostname 
$(hostname)"),
+            ("2.5.0", "airflow jobs check --job-type SchedulerJob --local"),
+        ],
+    )
+    def test_startupprobe_command_depends_on_airflow_version(self, 
airflow_version, probe_command):
+        docs = render_chart(
+            values={"airflowVersion": f"{airflow_version}"},
+            show_only=["templates/scheduler/scheduler-deployment.yaml"],
+        )
+        assert (
+            probe_command
+            in 
jmespath.search("spec.template.spec.containers[0].startupProbe.exec.command", 
docs[0])[-1]
+        )
+
     @pytest.mark.parametrize(
         "log_persistence_values, expected_volume",
         [
diff --git a/helm_tests/webserver/test_webserver.py 
b/helm_tests/webserver/test_webserver.py
index d76cdb2eaa..cbb9b0a0fa 100644
--- a/helm_tests/webserver/test_webserver.py
+++ b/helm_tests/webserver/test_webserver.py
@@ -25,7 +25,7 @@ from tests.charts.helm_template_generator import render_chart
 class TestWebserverDeployment:
     """Tests webserver deployment."""
 
-    def test_should_add_host_header_to_liveness_and_readiness_probes(self):
+    def 
test_should_add_host_header_to_liveness_and_readiness_and_startup_probes(self):
         docs = render_chart(
             values={
                 "config": {
@@ -41,8 +41,11 @@ class TestWebserverDeployment:
         assert {"name": "Host", "value": "example.com"} in jmespath.search(
             
"spec.template.spec.containers[0].readinessProbe.httpGet.httpHeaders", docs[0]
         )
+        assert {"name": "Host", "value": "example.com"} in jmespath.search(
+            
"spec.template.spec.containers[0].startupProbe.httpGet.httpHeaders", docs[0]
+        )
 
-    def test_should_add_path_to_liveness_and_readiness_probes(self):
+    def 
test_should_add_path_to_liveness_and_readiness_and_startup_probes(self):
         docs = render_chart(
             values={
                 "config": {
@@ -60,6 +63,10 @@ class TestWebserverDeployment:
             
jmespath.search("spec.template.spec.containers[0].readinessProbe.httpGet.path", 
docs[0])
             == "/mypath/path/health"
         )
+        assert (
+            
jmespath.search("spec.template.spec.containers[0].startupProbe.httpGet.path", 
docs[0])
+            == "/mypath/path/health"
+        )
 
     @pytest.mark.parametrize(
         "revision_history_limit, global_revision_history_limit",
@@ -91,6 +98,10 @@ class TestWebserverDeployment:
             
jmespath.search("spec.template.spec.containers[0].readinessProbe.httpGet.httpHeaders",
 docs[0])
             is None
         )
+        assert (
+            
jmespath.search("spec.template.spec.containers[0].startupProbe.httpGet.httpHeaders",
 docs[0])
+            is None
+        )
 
     def test_should_use_templated_base_url_for_probes(self):
         docs = render_chart(
@@ -111,15 +122,20 @@ class TestWebserverDeployment:
         assert {"name": "Host", "value": "release-name.com"} in 
jmespath.search(
             "readinessProbe.httpGet.httpHeaders", container
         )
+        assert {"name": "Host", "value": "release-name.com"} in 
jmespath.search(
+            "startupProbe.httpGet.httpHeaders", container
+        )
         assert "/mypath/release-name/path/health" == 
jmespath.search("livenessProbe.httpGet.path", container)
         assert "/mypath/release-name/path/health" == 
jmespath.search("readinessProbe.httpGet.path", container)
+        assert "/mypath/release-name/path/health" == 
jmespath.search("startupProbe.httpGet.path", container)
 
-    def test_should_add_scheme_to_liveness_and_readiness_probes(self):
+    def 
test_should_add_scheme_to_liveness_and_readiness_and_startup_probes(self):
         docs = render_chart(
             values={
                 "webserver": {
                     "livenessProbe": {"scheme": "HTTPS"},
                     "readinessProbe": {"scheme": "HTTPS"},
+                    "startupProbe": {"scheme": "HTTPS"},
                 }
             },
             show_only=["templates/webserver/webserver-deployment.yaml"],
@@ -131,6 +147,9 @@ class TestWebserverDeployment:
         assert "HTTPS" in jmespath.search(
             "spec.template.spec.containers[0].readinessProbe.httpGet.scheme", 
docs[0]
         )
+        assert "HTTPS" in jmespath.search(
+            "spec.template.spec.containers[0].startupProbe.httpGet.scheme", 
docs[0]
+        )
 
     def 
test_should_add_volume_and_volume_mount_when_exist_webserver_config(self):
         docs = render_chart(

Reply via email to