jykae commented on code in PR #68074: URL: https://github.com/apache/airflow/pull/68074#discussion_r3371312530
########## chart/files/db_migrate.py: ########## @@ -0,0 +1,247 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Bidirectional Airflow metadata DB reconciliation for the helm chart. + +Decides at runtime whether the helm release wants a forward migrate, a +downgrade, or a no-op, and runs the right command: + +* target == current -> no-op (idempotent check) +* target > current -> ``airflow db migrate`` inside this job's container + (uses the TARGET image, which ships forward scripts). +* target < current -> ``airflow db downgrade --to-version <target>`` + executed inside the still-running api-server pod (the OLD image still + ships the reverse scripts), followed by scaling every DB-touching + workload (api-server, scheduler, triggerer, dag-processor, worker) to + zero so that no OLD pod keeps talking to the now-downgraded schema. Helm + then patches those workloads back to ``replicas: N`` with the TARGET + image as the upgrade proceeds, so the cluster comes back up cleanly on + the target version. This means a downgrade trades the otherwise-broken + rolling-update window for a brief outage (which is unavoidable when the + schema goes backwards). + +Required env: + +* ``AIRFLOW_TARGET_VERSION`` - the version the chart is being upgraded/installed to. +* ``POD_NAMESPACE`` - release namespace, injected via downward API. +* ``RELEASE_NAME`` - the helm release name, used to scope the scale-down to + only the workloads owned by this release. + +Reference: https://github.com/apache/airflow/issues/68072 +""" + +from __future__ import annotations + +import os +import subprocess +import sys +import time + +from alembic.config import Config +from alembic.migration import MigrationContext +from alembic.script import ScriptDirectory +from kubernetes import client, config as k8s_config +from kubernetes.stream import stream +from sqlalchemy.exc import OperationalError + +import airflow +from airflow.settings import engine + +# NOTE: _REVISION_HEADS_MAP is a private symbol in airflow.utils.db. Tracked in +# #68072 to expose a public accessor; using the private name is the only way +# today to map a target version string to an alembic revision. +from airflow.utils.db import _REVISION_HEADS_MAP + + +def decide_action(target: str) -> str: + """Return one of ``noop``, ``forward``, ``downgrade``, ``fresh``.""" + target_rev = _REVISION_HEADS_MAP.get(target) + if target_rev is None: + # Unknown target version (e.g. dev build). Be conservative: forward only. + return "forward" Review Comment: Good catch. Added `_resolve_target_rev()` which falls back to the highest mapped version `<= target` (mirroring Airflow's own CLI behaviour), plus a regression test `test_decide_action_resolves_patch_version_via_nearest_lower` that exercises 3.2.2 → rev_320. ########## chart/templates/jobs/migrate-database-job.yaml: ########## @@ -105,6 +105,16 @@ spec: {{- end }} {{- if .Values.migrateDatabaseJob.args }} args: {{- tpl (toYaml .Values.migrateDatabaseJob.args) . | nindent 12 }} + {{- else }} Review Comment: Fixed. Switched to `{{- if not (kindIs "invalid" .Values.migrateDatabaseJob.args) }}` so an explicit `[]` is distinguished from `~` / unset and is no longer overridden by the embedded reconciler. ########## chart/tests/helm_tests/airflow_aux/test_db_migrate_script.py: ########## @@ -0,0 +1,305 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for the embedded bidirectional reconciler ``chart/files/db_migrate.py``. + +These exercise the runtime behaviour of the script itself (the helm template +tests only check that the script is embedded with the right env/args/RBAC). +The decide_action regression test exists because the first iteration walked +the target image's ScriptDirectory from base to ``current_rev``, which raises +``RevisionError`` in the actual downgrade case where ``current_rev`` is newer +than anything the target image knows about. See +https://github.com/apache/airflow/issues/68072. +""" + +from __future__ import annotations + +import importlib.util +import pathlib +import types +from unittest import mock + +import pytest +from sqlalchemy.exc import OperationalError + +DB_MIGRATE_PATH = pathlib.Path(__file__).resolve().parents[3] / "files" / "db_migrate.py" + + [email protected](scope="module") +def db_migrate(): + """Load chart/files/db_migrate.py as a module. + + The file is normally fed to ``python3 -c`` by the helm job rather than + imported, so it lives outside the chart's Python package tree. + """ + spec = importlib.util.spec_from_file_location("chart_db_migrate", DB_MIGRATE_PATH) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +# -------------------------------------------------------------------------- +# decide_action +# -------------------------------------------------------------------------- + + [email protected] +def patched_revision_map(db_migrate, monkeypatch): + """Install a stable {version: revision} mapping for decide_action tests.""" + fake_map = { + "3.0.0": "rev_300", + "3.1.0": "rev_310", + "3.2.0": "rev_320", + } + monkeypatch.setattr(db_migrate, "_REVISION_HEADS_MAP", fake_map) + return fake_map + + +def _patch_engine_returning(db_migrate, monkeypatch, current_rev): + """Patch ``engine.connect()`` so MigrationContext.get_current_revision() returns *current_rev*.""" + + class _Ctx: + def get_current_revision(self): + return current_rev + + class _Conn: + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + monkeypatch.setattr(db_migrate.engine, "connect", lambda: _Conn()) + monkeypatch.setattr( + db_migrate.MigrationContext, + "configure", + staticmethod(lambda _conn: _Ctx()), + ) + + +def _patch_script_dir(db_migrate, monkeypatch, ancestors_by_head): + """Patch ScriptDirectory.from_config so walk_revisions("base", X) yields ancestors_by_head[X].""" + + class _Rev: + def __init__(self, revision): + self.revision = revision + + class _ScriptDir: + def walk_revisions(self, base, head): + assert base == "base" + return [_Rev(r) for r in ancestors_by_head[head]] + + monkeypatch.setattr(db_migrate.ScriptDirectory, "from_config", staticmethod(lambda _cfg: _ScriptDir())) + + +def test_decide_action_unknown_target_falls_back_to_forward(db_migrate, monkeypatch): + # Empty map: any target is unknown -> conservative forward migrate. + monkeypatch.setattr(db_migrate, "_REVISION_HEADS_MAP", {}) + assert db_migrate.decide_action("9.9.9") == "forward" + Review Comment: Added `test_decide_action_resolves_patch_version_via_nearest_lower` covering the 3.2.2 → 3.2.0-rev case. ########## chart/tests/helm_tests/airflow_aux/test_db_migrate_script.py: ########## @@ -0,0 +1,305 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for the embedded bidirectional reconciler ``chart/files/db_migrate.py``. + +These exercise the runtime behaviour of the script itself (the helm template +tests only check that the script is embedded with the right env/args/RBAC). +The decide_action regression test exists because the first iteration walked +the target image's ScriptDirectory from base to ``current_rev``, which raises +``RevisionError`` in the actual downgrade case where ``current_rev`` is newer +than anything the target image knows about. See +https://github.com/apache/airflow/issues/68072. +""" + +from __future__ import annotations + +import importlib.util +import pathlib +import types +from unittest import mock + +import pytest +from sqlalchemy.exc import OperationalError + +DB_MIGRATE_PATH = pathlib.Path(__file__).resolve().parents[3] / "files" / "db_migrate.py" + + [email protected](scope="module") +def db_migrate(): + """Load chart/files/db_migrate.py as a module. + + The file is normally fed to ``python3 -c`` by the helm job rather than + imported, so it lives outside the chart's Python package tree. + """ + spec = importlib.util.spec_from_file_location("chart_db_migrate", DB_MIGRATE_PATH) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +# -------------------------------------------------------------------------- +# decide_action +# -------------------------------------------------------------------------- + + [email protected] +def patched_revision_map(db_migrate, monkeypatch): + """Install a stable {version: revision} mapping for decide_action tests.""" + fake_map = { + "3.0.0": "rev_300", + "3.1.0": "rev_310", + "3.2.0": "rev_320", + } + monkeypatch.setattr(db_migrate, "_REVISION_HEADS_MAP", fake_map) + return fake_map + + +def _patch_engine_returning(db_migrate, monkeypatch, current_rev): + """Patch ``engine.connect()`` so MigrationContext.get_current_revision() returns *current_rev*.""" + + class _Ctx: + def get_current_revision(self): + return current_rev + + class _Conn: + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + monkeypatch.setattr(db_migrate.engine, "connect", lambda: _Conn()) + monkeypatch.setattr( + db_migrate.MigrationContext, + "configure", + staticmethod(lambda _conn: _Ctx()), + ) + + +def _patch_script_dir(db_migrate, monkeypatch, ancestors_by_head): + """Patch ScriptDirectory.from_config so walk_revisions("base", X) yields ancestors_by_head[X].""" + + class _Rev: + def __init__(self, revision): + self.revision = revision + + class _ScriptDir: + def walk_revisions(self, base, head): + assert base == "base" + return [_Rev(r) for r in ancestors_by_head[head]] + + monkeypatch.setattr(db_migrate.ScriptDirectory, "from_config", staticmethod(lambda _cfg: _ScriptDir())) + + +def test_decide_action_unknown_target_falls_back_to_forward(db_migrate, monkeypatch): + # Empty map: any target is unknown -> conservative forward migrate. + monkeypatch.setattr(db_migrate, "_REVISION_HEADS_MAP", {}) + assert db_migrate.decide_action("9.9.9") == "forward" + + +def test_decide_action_fresh_when_db_unreachable(db_migrate, monkeypatch, patched_revision_map): + def _raise(*_a, **_kw): + raise OperationalError("SELECT 1", {}, Exception("unreachable")) + + monkeypatch.setattr(db_migrate.engine, "connect", _raise) + assert db_migrate.decide_action("3.1.0") == "fresh" + + +def test_decide_action_fresh_when_no_alembic_row(db_migrate, monkeypatch, patched_revision_map): + _patch_engine_returning(db_migrate, monkeypatch, current_rev=None) + assert db_migrate.decide_action("3.1.0") == "fresh" + + +def test_decide_action_noop_when_current_equals_target(db_migrate, monkeypatch, patched_revision_map): + _patch_engine_returning(db_migrate, monkeypatch, current_rev="rev_310") + assert db_migrate.decide_action("3.1.0") == "noop" + + +def test_decide_action_forward_when_current_is_ancestor_of_target( + db_migrate, monkeypatch, patched_revision_map +): + # current=3.0.0, target=3.1.0 -> rev_300 is in target's ancestor set. + _patch_engine_returning(db_migrate, monkeypatch, current_rev="rev_300") + _patch_script_dir(db_migrate, monkeypatch, ancestors_by_head={"rev_310": ["rev_300", "rev_310"]}) + assert db_migrate.decide_action("3.1.0") == "forward" + + +def test_decide_action_downgrade_when_current_not_in_target_ancestors( + db_migrate, monkeypatch, patched_revision_map +): + """Regression test for the original blocker. + + On a real downgrade, the TARGET image's ScriptDirectory does NOT contain + ``current_rev`` (current is newer than target). The previous implementation + called ``walk_revisions("base", current_rev)`` which raised + ``RevisionError`` and never reached the downgrade branch. The fix walks to + ``target_rev`` (always present in target's scripts) and checks membership. + """ + # current=3.2.0 (newer), target=3.1.0 (older). rev_320 is unknown to target. + _patch_engine_returning(db_migrate, monkeypatch, current_rev="rev_320") + _patch_script_dir(db_migrate, monkeypatch, ancestors_by_head={"rev_310": ["rev_300", "rev_310"]}) + assert db_migrate.decide_action("3.1.0") == "downgrade" + + +# -------------------------------------------------------------------------- +# discover_api_server_pod +# -------------------------------------------------------------------------- + + +def _pod(name, ready=True): + pod = types.SimpleNamespace() + pod.metadata = types.SimpleNamespace(name=name) + pod.status = types.SimpleNamespace( + conditions=[types.SimpleNamespace(type="Ready", status="True" if ready else "False")] + ) + return pod + + +def test_discover_api_server_pod_prefers_ready(db_migrate, monkeypatch): + fake_api = mock.MagicMock() + fake_api.list_namespaced_pod.return_value.items = [ + _pod("api-server-old", ready=False), + _pod("api-server-new", ready=True), + ] + monkeypatch.setattr(db_migrate.k8s_config, "load_incluster_config", lambda: None) + monkeypatch.setattr(db_migrate.client, "CoreV1Api", lambda: fake_api) Review Comment: Tightened the kubernetes mocks with `spec=k8s_client.CoreV1Api` / `spec=k8s_client.AppsV1Api` where they're used as API stubs (the exec-stream tests keep an unspecced mock because the stubbed `stream` callable accepts the api method as an arg). ########## chart/newsfragments/68074.significant.rst: ########## @@ -0,0 +1,31 @@ +Helm chart now reconciles the Airflow metadata DB bidirectionally on ``helm upgrade`` + +The ``migrate-database-job`` is now run as a ``pre-install,pre-upgrade`` hook +(was ``post-install,post-upgrade``) and its default args run a reconciliation +script that picks one of three actions depending on the relationship between +the chart's ``airflowVersion`` and the alembic head currently in the database: + +* ``target == current`` — no-op. +* ``target > current`` — ``airflow db migrate`` (existing behaviour). +* ``target < current`` — ``airflow db downgrade --to-version <target>`` Review Comment: Updated. The newsfragment now lists four actions including the fresh-install path. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
