Copilot commented on code in PR #64012: URL: https://github.com/apache/airflow/pull/64012#discussion_r3069238962
########## scripts/tests/test_contributor_scenario.py: ########## @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test simulating a full human/agent contributor workflow (the 'Exam').""" + +from __future__ import annotations + +import os +from unittest.mock import patch + +import pytest +from ci.prek.breeze_context import BreezieContext + Review Comment: `BreezieContext` is imported here, but the test body uses `BreezieContext` (different spelling). That will raise `NameError` and break the scenario test; please use a single, consistent identifier (and/or rely on the provided alias in `breeze_context.py`). ########## scripts/tests/test_contributor_scenario.py: ########## @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test simulating a full human/agent contributor workflow (the 'Exam').""" + +from __future__ import annotations + +import os +from unittest.mock import patch + +import pytest +from ci.prek.breeze_context import BreezieContext + + +class TestContributorScenario: + """Verifies the standard workflow (git add -> prek -> pytest) models correctly.""" + + def test_end_to_end_host_contribution_workflow(self): + """Simulates an agent running the standard workflow entirely on the host.""" + with patch.dict(os.environ, {}, clear=True): + with patch("os.path.exists", return_value=False): + with patch("os.path.isdir", return_value=False): + assert BreezieContext.is_in_breeze() is False + + # Step 1: Stage changes + cmd1 = BreezieContext.get_command("stage-changes", path="airflow/models/dag.py") + assert cmd1 == "git add airflow/models/dag.py" + + # Step 2: Run static checks + cmd2 = BreezieContext.get_command("run-static-checks", module="airflow/models/dag.py") + assert cmd2 == "prek run airflow/models/dag.py" + + # Step 3: Run targeted unit tests + cmd3 = BreezieContext.get_command("run-unit-tests", test_path="tests/models/test_dag.py") + assert cmd3 == "uv run --project airflow pytest tests/models/test_dag.py" Review Comment: This test uses `test_path="tests/models/test_dag.py"`, but there is no top-level `tests/` directory in the repo; core tests live under `airflow-core/tests/...`. Updating this path will make the example workflow reflect real commands contributors can run. ########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +import shlex +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezeContext: + """Detect whether code is running in Breeze container or on host.""" + + @staticmethod + def is_in_breeze(force_context: str | None = None) -> bool: + """ + Detect whether the current process is running inside Breeze. + + Parameters + ---------- + force_context: + Optional manual override. + - ``"breeze"`` forces Breeze context. + - ``"host"`` forces host context. + - ``None`` uses auto-detection. + + Returns + ------- + bool + ``True`` when running in Breeze, otherwise ``False``. + + Raises + ------ + ValueError + If ``force_context`` is not one of ``"host"`` or ``"breeze"``. + """ + if force_context is not None: + normalized = force_context.strip().lower() + if normalized == "breeze": + logger.info("Context forced to breeze via --force-context") + return True + if normalized == "host": + logger.info("Context forced to host via --force-context") + return False + raise ValueError(f"Invalid force_context={force_context!r}. Expected 'host' or 'breeze'.") + + # 1) BREEZE_HOME environment variable (explicit marker) + try: + breeze_home = os.environ.get("BREEZE_HOME") + except Exception as e: # pragma: no cover (defensive) + logger.exception("Failed to read BREEZE_HOME from environment: %s", e) + breeze_home = None + if breeze_home: + logger.debug("Detected Breeze via BREEZE_HOME=%r", breeze_home) + return True + + dockerenv_marker = "/.dockerenv" + opt_airflow_dir = "/opt/airflow" + + # 2) /.dockerenv (Docker container marker) + try: + if os.path.exists(dockerenv_marker): + logger.debug("Detected Breeze via %s marker", dockerenv_marker) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", dockerenv_marker, e) + + # 3) /opt/airflow (Breeze standard installation path) + try: + if os.path.isdir(opt_airflow_dir): + logger.debug("Detected Breeze via %s directory", opt_airflow_dir) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", opt_airflow_dir, e) + + # Optional: log SSH hint without changing behavior. + try: + ssh_connection = os.environ.get("SSH_CONNECTION") + except Exception: # pragma: no cover (defensive) + ssh_connection = None + if ssh_connection: + logger.debug("SSH_CONNECTION detected; defaulting to host unless Breeze markers exist") + + # 5) Default: assume host + logger.debug("No Breeze markers detected; assuming host execution") + return False + + @staticmethod + def get_command(skill_id: str, *, force_context: str | None = None, **kwargs) -> str: + """ + Generate the appropriate command for the given skill and context. + + Args: + skill_id: Unique skill identifier (e.g., "run-unit-tests") + **kwargs: Skill-specific parameters + + Returns: + Command string to execute + + Raises: + ValueError: If ``skill_id`` is unknown or required parameters are missing. + """ + is_breeze = BreezeContext.is_in_breeze(force_context=force_context) + context = "breeze" if is_breeze else "host" + + # Define all available skills + skills: dict[str, Any] = { + "stage-changes": { + # git add operates on the host working tree — not inside the container. + # This is explicitly a host-only operation per the contributor workflow. + "host": "git add {path}", + "breeze": None, + "preferred": "host", + "params": {"path": {"required": True}}, + }, + "run-static-checks": { + # prek is available on both host and inside Breeze; same command in both contexts. + "host": "prek run {module}", + "breeze": "prek run {module}", + "preferred": "host", + "params": {"module": {"required": False}}, + }, + "run-unit-tests": { + "host": "uv run --project airflow pytest {test_path}", + "breeze": "breeze run pytest {test_path}", Review Comment: The host command hard-codes `uv run --project airflow ...`, but this repo’s uv projects are directories like `airflow-core`, `scripts`, `task-sdk`, etc. Please make the project selectable (e.g. add a `project` param or infer it from `test_path`) so the generated command is runnable. ########## .agents/skills/run-unit-tests/SKILL.md: ########## @@ -0,0 +1,110 @@ +<!-- SPDX-License-Identifier: Apache-2.0 + https://www.apache.org/licenses/LICENSE-2.0 --> + +<!-- markdownlint-disable MD022 --> +--- +name: run-unit-tests +description: Run Airflow unit tests with context-aware command selection. Uses uv on the host and breeze exec inside the container. Supports targeted test paths to avoid running the full suite. +--- +<!-- markdownlint-enable MD022 --> + +Run Unit Tests +============== + +Run a targeted subset of Airflow unit tests. The correct command differs depending +on whether you are running on the **host** or inside a **Breeze** container. + +Source of truth for test workflow semantics is `contributing-docs`; this skill maps +that guidance into context-aware host/Breeze command execution. + +Context Detection +----------------- + +Check for the Docker container marker before choosing a command: + +```bash +# You are inside Breeze if this file exists: +ls /.dockerenv +``` + +| Context | Detection | Command | +|---|---|---| +| **Host** | `/.dockerenv` absent | `uv run --project <PROJECT> pytest <path>` | +| **Breeze** | `/.dockerenv` present | `breeze exec pytest <path>` | + Review Comment: Inside-Breeze guidance uses `breeze exec pytest ...`, but the Airflow docs/CLI examples use `breeze run pytest ...` (and the new `BreezeContext.get_command` generates `breeze run`). Please align this skill doc so agents don’t learn a non-standard command. ########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +import shlex +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezeContext: + """Detect whether code is running in Breeze container or on host.""" + + @staticmethod + def is_in_breeze(force_context: str | None = None) -> bool: + """ + Detect whether the current process is running inside Breeze. + + Parameters + ---------- + force_context: + Optional manual override. + - ``"breeze"`` forces Breeze context. + - ``"host"`` forces host context. + - ``None`` uses auto-detection. + + Returns + ------- + bool + ``True`` when running in Breeze, otherwise ``False``. + + Raises + ------ + ValueError + If ``force_context`` is not one of ``"host"`` or ``"breeze"``. + """ + if force_context is not None: + normalized = force_context.strip().lower() + if normalized == "breeze": + logger.info("Context forced to breeze via --force-context") + return True + if normalized == "host": + logger.info("Context forced to host via --force-context") + return False + raise ValueError(f"Invalid force_context={force_context!r}. Expected 'host' or 'breeze'.") + + # 1) BREEZE_HOME environment variable (explicit marker) + try: + breeze_home = os.environ.get("BREEZE_HOME") + except Exception as e: # pragma: no cover (defensive) + logger.exception("Failed to read BREEZE_HOME from environment: %s", e) + breeze_home = None + if breeze_home: + logger.debug("Detected Breeze via BREEZE_HOME=%r", breeze_home) + return True + + dockerenv_marker = "/.dockerenv" + opt_airflow_dir = "/opt/airflow" + + # 2) /.dockerenv (Docker container marker) + try: + if os.path.exists(dockerenv_marker): + logger.debug("Detected Breeze via %s marker", dockerenv_marker) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", dockerenv_marker, e) + + # 3) /opt/airflow (Breeze standard installation path) + try: + if os.path.isdir(opt_airflow_dir): + logger.debug("Detected Breeze via %s directory", opt_airflow_dir) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", opt_airflow_dir, e) + + # Optional: log SSH hint without changing behavior. + try: + ssh_connection = os.environ.get("SSH_CONNECTION") + except Exception: # pragma: no cover (defensive) + ssh_connection = None + if ssh_connection: + logger.debug("SSH_CONNECTION detected; defaulting to host unless Breeze markers exist") + + # 5) Default: assume host + logger.debug("No Breeze markers detected; assuming host execution") + return False + + @staticmethod + def get_command(skill_id: str, *, force_context: str | None = None, **kwargs) -> str: + """ + Generate the appropriate command for the given skill and context. + + Args: + skill_id: Unique skill identifier (e.g., "run-unit-tests") + **kwargs: Skill-specific parameters + + Returns: + Command string to execute + + Raises: + ValueError: If ``skill_id`` is unknown or required parameters are missing. + """ + is_breeze = BreezeContext.is_in_breeze(force_context=force_context) + context = "breeze" if is_breeze else "host" + + # Define all available skills + skills: dict[str, Any] = { + "stage-changes": { + # git add operates on the host working tree — not inside the container. + # This is explicitly a host-only operation per the contributor workflow. + "host": "git add {path}", + "breeze": None, + "preferred": "host", + "params": {"path": {"required": True}}, Review Comment: Skills are defined both here (hard-coded command templates) and in `.agents/skills/*/SKILL.md`. Since the docs already diverge (`breeze exec` vs `breeze run`), consider making one the single source of truth or adding a drift check so agent-facing docs don’t go stale. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
