Copilot commented on code in PR #64008: URL: https://github.com/apache/airflow/pull/64008#discussion_r3025358363
########## scripts/ci/pre_commit/skill_graph.py: ########## @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Build dependency graph from skills.json (prereqs) and output tree + JSON graph.""" +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +def parse_prereq_skill_ids(prereqs_str: str, skill_ids: set[str]) -> list[str]: + """Return list of skill IDs that appear in prereqs string (comma-separated).""" + if not prereqs_str or not prereqs_str.strip(): + return [] + return [x.strip() for x in prereqs_str.split(",") if x.strip() in skill_ids] + + +def find_cycle(skill_ids: set[str], edges: list[tuple[str, str]]) -> list[str] | None: + """edges are (from_id, to_id) meaning from_id requires to_id. Return cycle as list or None.""" + # Build adjacency: node -> list of nodes it points to (dependencies) + adj: dict[str, list[str]] = {sid: [] for sid in skill_ids} + for from_id, to_id in edges: + adj[from_id].append(to_id) + path: list[str] = [] + path_set: set[str] = set() + in_stack: set[str] = set() + + def visit(n: str) -> list[str] | None: + path.append(n) + path_set.add(n) + in_stack.add(n) + for m in adj.get(n, []): + if m not in path_set: + cycle = visit(m) + if cycle is not None: + return cycle + elif m in in_stack: + # cycle: from m back to m + start = path.index(m) + return path[start:] + [m] + path.pop() + path_set.remove(n) + in_stack.remove(n) + return None + + for sid in skill_ids: + if sid not in path_set: + cycle = visit(sid) + if cycle is not None: + return cycle + return None + + +def build_graph(skills: list[dict]) -> tuple[list[dict], list[dict], dict[str, dict]]: + """ + Build nodes (for JSON), edges (for JSON), and id->skill for tree. + Edges: from skill_id to prereq_id (skill_id requires prereq_id). + """ + skill_ids = {s["id"] for s in skills} + id_to_skill = {s["id"]: s for s in skills} + nodes = [ + {"id": s["id"], "category": s.get("category", ""), "context": s.get("context", "")} + for s in skills + ] + edges = [] + for s in skills: + sid = s["id"] + prereqs = parse_prereq_skill_ids(s.get("prereqs", "") or "", skill_ids) + for p in prereqs: + edges.append({"from": sid, "to": p, "type": "requires"}) + return nodes, edges, id_to_skill + + +def build_tree_edges(id_to_skill: dict[str, dict], edges: list[dict]) -> dict[str, list[str]]: + """Given edges (from->to = requires), return for each node the list of children (dependents).""" + # edges: from A to B means A requires B. So B is dependency of A. Children of B = skills that require B. + children: dict[str, list[str]] = {sid: [] for sid in id_to_skill} + for e in edges: + child_id = e["from"] + parent_id = e["to"] + children[parent_id].append(child_id) + return children + + +def main() -> int: + parser = argparse.ArgumentParser(description="Build skill dependency graph from skills.json") + parser.add_argument( + "--skills-json", + default=None, + help="Path to skills.json (default: contributing-docs/agent_skills/skills.json under repo root)", + ) + parser.add_argument( + "--output", + default=None, + help="Path to write skill_graph.json (default: contributing-docs/agent_skills/skill_graph.json)", + ) + args = parser.parse_args() + root = Path(__file__).resolve().parents[3] + skills_path = Path(args.skills_json) if args.skills_json else root / "contributing-docs/agent_skills/skills.json" + if not skills_path.exists(): + print("skills.json not found", file=sys.stderr) + return 1 + out_path = Path(args.output) if args.output else root / "contributing-docs/agent_skills/skill_graph.json" + data = json.loads(skills_path.read_text(encoding="utf-8")) + skills = data.get("skills", []) + if not skills: Review Comment: `skill_graph.py` reads `skills.json` and proceeds even if the manifest contains invalid/duplicate skills; it only checks for cycles. Since this file is generated from docs, a malformed `skills.json` will cause confusing failures. Consider reusing the same validation logic as `extract_agent_skills.py`/`validate_skills.py` (or importing it) so graph generation fails with a clear validation error when the manifest is invalid. ########## scripts/ci/agent_skills/breeze_context.py: ########## @@ -0,0 +1,170 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Apache Airflow Agent Skills — Breeze Context Detection. + +Provides runtime host vs. container detection and command routing for +AI coding agents contributing to Apache Airflow. + +Detection priority chain: + 1. AIRFLOW_BREEZE_CONTAINER env var (explicit) + 2. /.dockerenv file (Docker marker) + 2b. /.containerenv file (Podman marker) + 3. /opt/airflow path (Breeze mount) + 4. Default: host +""" + +from __future__ import annotations + +import json +import os +import pathlib + +SKILLS_JSON = pathlib.Path("contributing-docs/agent_skills/skills.json") + + +def get_context(force: str | None = None) -> str: + """ + Return "breeze" if running inside Breeze container, else "host". + + Detection priority: + 1. AIRFLOW_BREEZE_CONTAINER env var + 2. /.dockerenv exists + 2b. /.containerenv exists (Podman) + 3. /opt/airflow exists + 4. default: host + + Args: + force: If set, bypass automatic detection and return this value + directly. Expected values: "host" or "breeze". + """ + if force is not None: + if force not in {"host", "breeze"}: + raise ValueError(f"force must be 'host' or 'breeze', got {force!r}") + return force + + if os.environ.get("AIRFLOW_BREEZE_CONTAINER"): + return "breeze" + if pathlib.Path("/.dockerenv").exists(): + return "breeze" + if pathlib.Path("/.containerenv").exists(): + return "breeze" + if pathlib.Path("/opt/airflow").exists(): + return "breeze" + return "host" + + +def load_skills() -> list[dict]: + """Load skills from the generated skills.json manifest.""" + if not SKILLS_JSON.exists(): + raise FileNotFoundError( + f"skills.json not found at {SKILLS_JSON}. Run extract_agent_skills.py first.", + ) + with SKILLS_JSON.open() as f: + data = json.load(f) + return data["skills"] + + +def get_skill(skill_id: str) -> dict: + """Return a skill definition by id.""" + skills = load_skills() + for skill in skills: + if skill.get("id") == skill_id: + return skill + raise KeyError(f"Skill '{skill_id}' not found. Available: {[s.get('id') for s in skills]}") Review Comment: `get_skill()` raises `KeyError` with a message that includes the full list of available skill IDs. As the skill set grows, this can produce very large error output and be noisy in CI/logs. Consider including only a count plus a short hint (e.g., suggest `query_skills.py`) or truncating the list to the first N IDs. ```suggestion available_ids = [s.get("id") for s in skills] total = len(available_ids) preview_limit = 10 preview_ids = available_ids[:preview_limit] preview_text = ", ".join(str(i) for i in preview_ids) if total > preview_limit: preview_text += ", ..." raise KeyError( f"Skill '{skill_id}' not found among {total} skills. " f"First {min(total, preview_limit)} IDs: [{preview_text}]. " "Use scripts/ci/agent_skills/query_skills.py or inspect skills.json for the full list.", ) ``` ########## scripts/ci/agent_skills/validate_skills.py: ########## @@ -0,0 +1,136 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Validate the Agent Skills manifest against a JSON-schema-like contract.""" + +from __future__ import annotations + +import argparse +import json +import pathlib +import re +import sys + +SKILLS_JSON = pathlib.Path("contributing-docs/agent_skills/skills.json") Review Comment: `SKILLS_JSON` is a relative path, so running `validate_skills.py` from outside the repo root will fail. Consider resolving the default path relative to the repo root (via `Path(__file__)`) and/or adding a `--skills-json` option to make the CLI usable from any working directory. ########## scripts/ci/pre_commit/extract_agent_skills.py: ########## @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Extract and validate .. agent-skill:: blocks from AGENTS.md and contributing-docs/*.rst into skills.json.""" +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +def collect_source_texts(root: Path, docs_file: str, docs_dir: str) -> list[tuple[Path, str]]: + """Return list of (path, text) for AGENTS.md and all .rst under docs_dir.""" + result: list[tuple[Path, str]] = [] + agents_path = Path(docs_file) if Path(docs_file).is_absolute() else root / docs_file + if agents_path.exists(): + result.append((agents_path, agents_path.read_text(encoding="utf-8"))) + contrib_dir = Path(docs_dir) if Path(docs_dir).is_absolute() else root / docs_dir + if contrib_dir.is_dir(): + for rst_path in sorted(contrib_dir.rglob("*.rst")): + result.append((rst_path, rst_path.read_text(encoding="utf-8"))) + return result + +VALID_CONTEXTS = {"host", "breeze", "either"} +VALID_CATEGORIES = {"environment", "testing", "linting", "providers", "dags", "documentation"} +ID_RE = re.compile(r"^[a-z][a-z0-9-]*$") + + +def parse_skills(text: str) -> list[dict]: + """Extract agent-skill blocks from one file's text; return list of skill dicts.""" + blocks = re.split(r"\n\.\. agent-skill::\s*\n", text) + skills = [] + for block in blocks[1:]: # skip content before first block + skill = {} + # Options: :key: value (value can continue on next line if indented) + for m in re.finditer(r"^ :(\w+):\s*(.*?)(?=^ :\w+:|^ \.\.|\Z)", block, re.M | re.S): + key, val = m.group(1), m.group(2) + skill[key] = re.sub(r"\n\s+", " ", val).strip() + # code-block content + cb = re.search(r"\.\. code-block::\s*\w+\s*\n(.*?)(?=\n \.\.|\Z)", block, re.S) + skill["command"] = re.sub(r"^ ", "", cb.group(1).strip()).replace("\n ", "\n") if cb else "" + # expected output + eo = re.search(r"\.\. agent-skill-expected-output::\s*\n(.*?)(?=\n\.\. agent-skill|\Z)", block, re.S) + skill["expected_output"] = re.sub(r"^ ", "", eo.group(1).strip()).replace("\n ", "\n") if eo else "" + skills.append(skill) + return skills + + +def validate_skill(skill: dict, index: int) -> None: + """Raise ValueError if skill is invalid.""" + sid = skill.get("id", "") + if not sid: + raise ValueError(f"Skill at index {index}: missing required field 'id'") + if not ID_RE.match(sid): + raise ValueError(f"Skill '{sid}': id must match ^[a-z][a-z0-9-]*$") + ctx = skill.get("context", "") + if not ctx: + raise ValueError(f"Skill '{sid}': missing required field 'context'") + if ctx not in VALID_CONTEXTS: + raise ValueError(f"Skill '{sid}': context must be one of {sorted(VALID_CONTEXTS)}, got '{ctx}'") + cat = skill.get("category", "") + if not cat: + raise ValueError(f"Skill '{sid}': missing required field 'category'") + if cat not in VALID_CATEGORIES: + raise ValueError(f"Skill '{sid}': category must be one of {sorted(VALID_CATEGORIES)}, got '{cat}'") + desc = (skill.get("description") or "").strip() + if not desc: + raise ValueError(f"Skill '{sid}': missing or empty required field 'description'") + cmd = (skill.get("command") or "").strip() + if not cmd: + raise ValueError(f"Skill '{sid}': missing or empty required field 'command'") + + +def _skill_key_set(skill: dict) -> set[str]: + """Return set of keys that define skill identity and content for comparison.""" + return {"id", "context", "category", "prereqs", "validates", "description", "command", "expected_output"} + + +def _normalize_skill_for_compare(skill: dict) -> dict: + """Return a comparable dict with only the fields we care about, sorted keys.""" + keys = _skill_key_set(skill) + return {k: skill.get(k, "") for k in sorted(keys) if k in skill or k in keys} + Review Comment: Drift detection normalizes skills using a fixed key set that excludes fields like `fallback` / `fallback_condition`, even though those fields are present in the extracted output (see `contributing-docs/agent_skills/skills.json` for `run-single-test`). As a result, changes to these fields will not be detected by `--check`. Consider either including all schema-supported keys in `_skill_key_set()` (or comparing the full dict after normalizing formatting) so drift checking is complete for the manifest contract. ########## scripts/ci/pre_commit/tests/test_e2e.py: ########## @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""End-to-end tests for agent-skills extraction and command routing.""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest + +PRE_COMMIT_DIR = Path(__file__).resolve().parents[1] +CI_DIR = Path(__file__).resolve().parents[2] +REPO_ROOT = Path(__file__).resolve().parents[4] +sys.path.insert(0, str(CI_DIR)) + +from agent_skills import breeze_context # noqa: E402 + + +def _extract_skills_to_file(skills_file: Path) -> subprocess.CompletedProcess[str]: Review Comment: This test mutates `sys.path` to import `agent_skills` from `scripts/ci`. This can mask real import/package issues and makes behavior depend on how pytest is invoked. Prefer setting up tests so imports work via normal module resolution (e.g., add the repo root to `PYTHONPATH` in the test runner) rather than per-test `sys.path.insert`. ```suggestion REPO_ROOT = Path(__file__).resolve().parents[4] from scripts.ci.agent_skills import breeze_context # noqa: E402 def _extract_skills_to_file(skills_file: Path) -> subprocess.CompletedProcess[str]: def _extract_skills_to_file(skills_file: Path) -> subprocess.CompletedProcess[str]: ``` ########## scripts/ci/pre_commit/extract_agent_skills.py: ########## @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Extract and validate .. agent-skill:: blocks from AGENTS.md and contributing-docs/*.rst into skills.json.""" +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +def collect_source_texts(root: Path, docs_file: str, docs_dir: str) -> list[tuple[Path, str]]: + """Return list of (path, text) for AGENTS.md and all .rst under docs_dir.""" + result: list[tuple[Path, str]] = [] + agents_path = Path(docs_file) if Path(docs_file).is_absolute() else root / docs_file + if agents_path.exists(): + result.append((agents_path, agents_path.read_text(encoding="utf-8"))) + contrib_dir = Path(docs_dir) if Path(docs_dir).is_absolute() else root / docs_dir + if contrib_dir.is_dir(): + for rst_path in sorted(contrib_dir.rglob("*.rst")): + result.append((rst_path, rst_path.read_text(encoding="utf-8"))) + return result + +VALID_CONTEXTS = {"host", "breeze", "either"} +VALID_CATEGORIES = {"environment", "testing", "linting", "providers", "dags", "documentation"} +ID_RE = re.compile(r"^[a-z][a-z0-9-]*$") + + +def parse_skills(text: str) -> list[dict]: + """Extract agent-skill blocks from one file's text; return list of skill dicts.""" + blocks = re.split(r"\n\.\. agent-skill::\s*\n", text) + skills = [] + for block in blocks[1:]: # skip content before first block + skill = {} + # Options: :key: value (value can continue on next line if indented) + for m in re.finditer(r"^ :(\w+):\s*(.*?)(?=^ :\w+:|^ \.\.|\Z)", block, re.M | re.S): + key, val = m.group(1), m.group(2) + skill[key] = re.sub(r"\n\s+", " ", val).strip() + # code-block content + cb = re.search(r"\.\. code-block::\s*\w+\s*\n(.*?)(?=\n \.\.|\Z)", block, re.S) + skill["command"] = re.sub(r"^ ", "", cb.group(1).strip()).replace("\n ", "\n") if cb else "" + # expected output + eo = re.search(r"\.\. agent-skill-expected-output::\s*\n(.*?)(?=\n\.\. agent-skill|\Z)", block, re.S) Review Comment: The `agent-skill-expected-output` capture regex is too broad: it stops only at the next top-level `.. agent-skill` or EOF, so for the last skill block in a file it will swallow the remainder of the document. This is already visible in the generated `contributing-docs/agent_skills/skills.json` where `run-static-checks-prek.expected_output` contains large unrelated sections. Please change the stop condition to end the expected-output block on dedent / next directive within the same skill block (e.g., first non-indented line) so only the intended expected output is captured. ```suggestion eo = re.search( r"\.\. agent-skill-expected-output::\s*\n(.*?)(?=\n \.\.|\n\S|\Z)", block, re.S, ) ``` ########## scripts/ci/pre_commit/tests/test_extract_agent_skills.py: ########## @@ -0,0 +1,474 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for extract_agent_skills.py.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +# Add parent so we can import extract_agent_skills +SCRIPT_DIR = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(SCRIPT_DIR)) + +from extract_agent_skills import extract_all_skills, parse_skills, validate_skill + +VALID_SKILL_RST = """ +.. agent-skill:: + :id: setup-breeze-environment + :context: host + :category: environment + :prereqs: docker, python>=3.9 + :validates: breeze-env-ready + :description: Start the Airflow Breeze development environment + + .. code-block:: bash + + breeze start-airflow + + .. agent-skill-expected-output:: + + Airflow webserver at http://localhost:28080 +""" + + +def test_valid_skill_parsed(): + """Sample RST string with one valid skill block, assert all fields extracted correctly.""" + skills = parse_skills(VALID_SKILL_RST) + assert len(skills) == 1 + skill = skills[0] + assert skill["id"] == "setup-breeze-environment" + assert skill["context"] == "host" + assert skill["category"] == "environment" + assert skill["prereqs"] == "docker, python>=3.9" + assert skill["validates"] == "breeze-env-ready" + assert "Breeze development environment" in skill["description"] + assert "breeze start-airflow" in skill["command"] + assert "localhost:28080" in skill["expected_output"] + validate_skill(skill, 0) + + +def test_invalid_context_fails(tmp_path): + """context: 'container' should raise SystemExit with code 1.""" + rst = """ +.. agent-skill:: + :id: bad-context + :context: container + :category: environment + :description: Bad context skill + + .. code-block:: bash + + echo ok + + .. agent-skill-expected-output:: + + ok +""" + docs_file = tmp_path / "AGENTS.md" + docs_file.write_text(rst) + out_file = tmp_path / "skills.json" + result = subprocess.run( + [ + sys.executable, + str(SCRIPT_DIR / "extract_agent_skills.py"), + "--docs-file", + str(docs_file), + "--output", + str(out_file), + ], + capture_output=True, + text=True, + ) Review Comment: Several tests invoke `subprocess.run(...)` without a timeout (e.g., this block and others below). If the script blocks for any reason, pytest can hang. Please add a small `timeout=` to these subprocess calls so failures are bounded and CI is more reliable. ########## contributing-docs/agent_skills/skills.json: ########## @@ -0,0 +1,100 @@ +{ + "version": "1.0", + "generated_by": "extract_agent_skills.py", + "generated_from": "AGENTS.md, contributing-docs/*.rst", + "skill_count": 9, + "skills": [ + { + "id": "setup-breeze-environment", + "context": "host", + "category": "environment", + "prereqs": "docker, python>=3.9", + "validates": "breeze-env-ready", + "description": "Start the Airflow Breeze development environment", + "command": "breeze start-airflow", + "expected_output": "Airflow webserver at http://localhost:28080\nLogin: admin / admin" + }, + { + "id": "run-single-test", + "context": "host", + "category": "testing", + "prereqs": "setup-breeze-environment", + "validates": "tests-pass", + "fallback": "breeze run pytest {test_path} -xvs", + "fallback_condition": "missing_system_deps", + "description": "Run a single test with uv, falling back to breeze if system deps are missing", + "command": "# Primary: uv (no Docker needed, faster)\nuv run --project {project} pytest {test_path} -xvs\n# Fallback: only when system deps missing\n# breeze run pytest {test_path} -xvs", + "expected_output": "PASSED" + }, + { + "id": "run-static-checks", + "context": "host", + "category": "linting", + "prereqs": "setup-breeze-environment", + "validates": "static-checks-pass", + "description": "Run fast static checks with prek", + "command": "prek run --from-ref main --stage pre-commit", + "expected_output": "All checks passed." + }, + { + "id": "run-manual-checks", + "context": "host", + "category": "linting", + "prereqs": "run-static-checks", + "validates": "manual-checks-pass", + "description": "Run slower manual checks with prek", + "command": "prek run --from-ref main --stage manual", + "expected_output": "All checks passed." + }, + { + "id": "build-docs", + "context": "host", + "category": "documentation", + "prereqs": "setup-breeze-environment", + "validates": "docs-build-clean", + "description": "Build Airflow documentation locally", + "command": "breeze build-docs", + "expected_output": "Build finished. The HTML pages are in _build/html." + }, + { + "id": "run-provider-tests", + "context": "host", + "category": "testing", + "prereqs": "setup-breeze-environment", + "validates": "provider-tests-pass", + "description": "Run complete test suite for a specific provider", + "command": "breeze testing providers-tests\n --test-type \"Providers[{provider}]\"", + "expected_output": "All tests passed." + }, + { + "id": "run-type-check", + "context": "host", + "category": "linting", + "prereqs": "setup-breeze-environment", + "validates": "type-check-pass", + "description": "Run mypy type checking on changed files", + "command": "breeze run mypy {path}", + "expected_output": "Success: no issues found" + }, + { + "id": "run-tests-uv-first", + "context": "host", + "category": "testing", + "prereqs": "setup-breeze-environment", + "validates": "tests-pass", + "description": "Run tests with uv, fall back to Breeze if system deps missing", + "command": "uv run --project {project} pytest {test_path} -xvs\n# fallback: breeze run pytest {test_path} -xvs", + "expected_output": "PASSED" + }, + { + "id": "run-static-checks-prek", + "context": "host", + "category": "linting", + "prereqs": "setup-breeze-environment", + "validates": "static-checks-pass", + "description": "Run fast pre-commit static checks on changed files using prek", + "command": "prek run --from-ref main --stage pre-commit", + "expected_output": "All checks passed.\n\n- Running Unit tests inside Breeze environment.\n\n Just run ``pytest filepath+filename`` to run the tests.\n\n.. code-block:: bash\n\n [Breeze:3.10.19] root@63528318c8b1:/opt/airflow# pytest tests/utils/test_dates.py\n ============================================================= test session starts ==============================================================\n platform linux -- Python 3.10.20, pytest-8.3.3, pluggy-1.5.0 -- /usr/python/bin/python\n cachedir: .pytest_cache\n rootdir: /opt/airflow\n configfile: pyproject.toml\n plugins: anyio-4.6.0, time-machine-2.15.0, icdiff-0.9, rerunfailures-14.0, instafail-0.5.0, custom-exit-code-0.3.0, xdist-3.6.1, mock-3.14.0, cov-5.0.0, asyncio-0.24.0, requests-mock-1.12.1, timeouts-1.2.1\n asyncio: mode=strict, default_loop_scope=None\n setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s\n collected 4 items\n\n tests/utils/test_dates.py::TestDates::test _parse_execution_date PASSED [ 25%]\n tests/utils/test_dates.py::TestDates::test_round_time PASSED [ 50%]\n tests/utils/test_dates.py::TestDates::test_infer_time_unit PASSED [ 75%]\n tests/utils/test_dates.py::TestDates::test_scale_time_units PASSED [100%]\n\n ================================================================== 4 passed in 3.30s ===================================================================\n\n- Running All the tests with Breeze by specifying the required python version, backend, backend version\n\n.. code-block:: bash\n\n breeze --backend postgres --postgres-version 15 --python 3.10 --db-reset testing tests --test-type All\n\n- Running specifi c type of test\n\n .. code-block:: bash\n\n breeze --backend postgres --postgres-version 15 --python 3.10 --db-reset testing tests --test-type Core\n\n\n- Running Integration test for specific test type\n\n .. code-block:: bash\n\n breeze --backend postgres --postgres-version 15 --python 3.10 --db-reset testing tests --test-type All --integration mongo\n\n- For more information on Testing visit |09_testing.rst|\n\n .. |09_testing.rst| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/09_testing.rst\" target=\"_blank\">09_testing.rst</a>\n\n- Similarly to regular development, you can also debug while testing using your IDE, for more information, you may refer to\n\n |Local and Remote Debugging in IDE|\n\n .. |Local and Remote Debugging in IDE| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/07_local_virtualenv.rst#local-and-remote-debugging-in-ide\"\n target=\"_blank\">Local and Remote Debuggi ng in IDE</a>\n\nContribution guide\n##################\n\n- To know how to contribute to the project visit |README.rst|\n\n.. |README.rst| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/README.rst\" target=\"_blank\">README.rst</a>\n\n- Following are some of the important links of Contribution documentation\n\n - |Types of contributions|\n\n .. |Types of contributions| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/04_how_to_contribute.rst\" target=\"_blank\">\n Types of contributions</a>\n\n - |Roles of contributor|\n\n .. |Roles of contributor| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/01_roles_in_airflow_project.rst\" target=\"_blank\">Roles of\n contributor</a>\n\n\n - |Workflow for a contribution|\n\n .. |Workflow for a contribution| raw:: html\n\n <a href=\"https://github.com/apache/airflow/blob/main/contributing-docs/18_contribution_w orkflow.rst\" target=\"_blank\">\n Workflow for a contribution</a>\n\n\n\nRaising Pull Request\n--------------------\n\n1. Go to your GitHub account and open your fork project and click on Branches\n\n .. raw:: html\n\n <div align=\"center\" style=\"padding-bottom:10px\">\n<img src=\"images/quick_start/pr1.png\"\n alt=\"Go to fork and select branches\">\n </div>\n\n2. Click on ``New pull request`` button on branch from which you want to raise a pull request\n\n .. raw:: html\n\n<div align=\"center\" style=\"padding-bottom:10px\">\n <img src=\"images/quick_start/pr2.png\"\n alt=\"Accessing local airflow\">\n</div>\n\n3. Add title and description as per Contributing guidelines and click on ``Create pull request``\n\n .. raw:: html\n\n<div align=\"center\" style=\"padding-bottom:10px\">\n <img src=\"images/quick_start/pr3.png\"\n alt=\"Accessing local airflow\">\n</div>\n\n\nSyncing Fork and rebasing Pull request\n--------------------------------------\n \nOften it takes several days or weeks to discuss and iterate with the PR until it is ready to merge.\nIn the meantime new commits are merged, and you might run into conflicts, therefore you should periodically\nsynchronize main in your fork with the ``apache/airflow`` main and rebase your PR on top of it. Following\ndescribes how to do it.\n\n* `Update new changes made to apache:airflow project to your fork <10_working_with_git.rst#how-to-sync-your-fork>`__\n* `Rebasing pull request <10_working_with_git.rst#how-to-rebase-pr>`__\n\n\nUsing your IDE\n##############\n\nIf you are familiar with Python development and use your favourite editors, Airflow can be setup\nsimilarly to other projects of yours. However, if you need specific instructions for your IDE you\nwill find more detailed instructions here:\n\n* `Pycharm/IntelliJ <quick-start-ide/contributors_quick_start_pycharm.rst>`_\n* `Visual Studio Code <quick-start-ide/contributors_quick_start_vscode.rst>`_\n\n\nUsing Remote develo pment environments\n#####################################\n\nIn order to use remote development environment, you usually need a paid account, but you do not have to\nsetup local machine for development.\n\n* `GitPod <quick-start-ide/contributors_quick_start_gitpod.rst>`_\n* `GitHub Codespaces <quick-start-ide/contributors_quick_start_codespaces.rst>`_\n\n\n----------------\n\nOnce you have your environment set up, you can start contributing to Airflow. You can find more\nabout ways you can contribute in the `How to contribute <04_how_to_contribute.rst>`_ document." + } Review Comment: The committed generated `skills.json` contains an incorrect `expected_output` for `run-static-checks-prek` (it includes a large chunk of unrelated RST content after the intended 'All checks passed.'). This likely comes from the extractor’s expected-output parsing and makes the manifest unusable for consumers. Regenerate `skills.json` after fixing the extractor so that `expected_output` only contains the expected-output block contents. ########## scripts/ci/pre_commit/skill_graph.py: ########## @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Build dependency graph from skills.json (prereqs) and output tree + JSON graph.""" +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +def parse_prereq_skill_ids(prereqs_str: str, skill_ids: set[str]) -> list[str]: + """Return list of skill IDs that appear in prereqs string (comma-separated).""" + if not prereqs_str or not prereqs_str.strip(): + return [] + return [x.strip() for x in prereqs_str.split(",") if x.strip() in skill_ids] + + +def find_cycle(skill_ids: set[str], edges: list[tuple[str, str]]) -> list[str] | None: + """edges are (from_id, to_id) meaning from_id requires to_id. Return cycle as list or None.""" + # Build adjacency: node -> list of nodes it points to (dependencies) + adj: dict[str, list[str]] = {sid: [] for sid in skill_ids} + for from_id, to_id in edges: + adj[from_id].append(to_id) + path: list[str] = [] + path_set: set[str] = set() + in_stack: set[str] = set() + + def visit(n: str) -> list[str] | None: + path.append(n) + path_set.add(n) + in_stack.add(n) + for m in adj.get(n, []): + if m not in path_set: + cycle = visit(m) + if cycle is not None: + return cycle + elif m in in_stack: + # cycle: from m back to m + start = path.index(m) + return path[start:] + [m] + path.pop() + path_set.remove(n) + in_stack.remove(n) + return None + + for sid in skill_ids: + if sid not in path_set: + cycle = visit(sid) + if cycle is not None: + return cycle + return None + + +def build_graph(skills: list[dict]) -> tuple[list[dict], list[dict], dict[str, dict]]: + """ + Build nodes (for JSON), edges (for JSON), and id->skill for tree. + Edges: from skill_id to prereq_id (skill_id requires prereq_id). + """ Review Comment: `build_graph()` assumes every skill dict has an `id` key (`{s['id'] for s in skills}`), which will raise `KeyError` if the manifest contains an invalid entry. Consider validating entries (or skipping malformed ones with a clear error) before building sets/dicts so the CLI fails with a helpful message rather than a traceback. ```suggestion """ # Validate that every skill entry has an "id" key before building sets/dicts. for index, s in enumerate(skills): if not isinstance(s, dict): raise ValueError(f"Skill entry at index {index} is not a mapping: {s!r}") if "id" not in s: raise ValueError(f"Skill entry at index {index} is missing required 'id' field: {s!r}") ``` ########## contributing-docs/agent_skills/skill_graph.json: ########## @@ -0,0 +1,71 @@ +{ + "nodes": [ + { + "id": "setup-breeze-environment", + "category": "environment", + "context": "host" + }, + { + "id": "run-single-test", + "category": "testing", + "context": "host" + }, + { + "id": "run-static-checks", + "category": "linting", + "context": "host" + }, + { + "id": "run-manual-checks", + "category": "linting", + "context": "host" + }, + { + "id": "build-docs", + "category": "documentation", + "context": "host" + }, + { + "id": "run-provider-tests", + "category": "testing", + "context": "host" + }, + { + "id": "run-type-check", + "category": "linting", + "context": "host" + } + ], Review Comment: `skill_graph.json` is out of sync with the committed `skills.json`: it only includes 7 nodes (missing the 2 RST-defined skills `run-tests-uv-first` and `run-static-checks-prek`) while `skills.json` reports 9 skills. This suggests the graph was generated from a different input or is stale. Please regenerate `skill_graph.json` from the current `skills.json` (or adjust the workflow so this derived artifact cannot drift). ########## scripts/ci/pre_commit/extract_agent_skills.py: ########## @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Extract and validate .. agent-skill:: blocks from AGENTS.md and contributing-docs/*.rst into skills.json.""" +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +def collect_source_texts(root: Path, docs_file: str, docs_dir: str) -> list[tuple[Path, str]]: + """Return list of (path, text) for AGENTS.md and all .rst under docs_dir.""" + result: list[tuple[Path, str]] = [] + agents_path = Path(docs_file) if Path(docs_file).is_absolute() else root / docs_file + if agents_path.exists(): + result.append((agents_path, agents_path.read_text(encoding="utf-8"))) + contrib_dir = Path(docs_dir) if Path(docs_dir).is_absolute() else root / docs_dir + if contrib_dir.is_dir(): + for rst_path in sorted(contrib_dir.rglob("*.rst")): + result.append((rst_path, rst_path.read_text(encoding="utf-8"))) + return result + +VALID_CONTEXTS = {"host", "breeze", "either"} +VALID_CATEGORIES = {"environment", "testing", "linting", "providers", "dags", "documentation"} +ID_RE = re.compile(r"^[a-z][a-z0-9-]*$") + + +def parse_skills(text: str) -> list[dict]: + """Extract agent-skill blocks from one file's text; return list of skill dicts.""" + blocks = re.split(r"\n\.\. agent-skill::\s*\n", text) + skills = [] + for block in blocks[1:]: # skip content before first block + skill = {} + # Options: :key: value (value can continue on next line if indented) + for m in re.finditer(r"^ :(\w+):\s*(.*?)(?=^ :\w+:|^ \.\.|\Z)", block, re.M | re.S): + key, val = m.group(1), m.group(2) + skill[key] = re.sub(r"\n\s+", " ", val).strip() + # code-block content + cb = re.search(r"\.\. code-block::\s*\w+\s*\n(.*?)(?=\n \.\.|\Z)", block, re.S) + skill["command"] = re.sub(r"^ ", "", cb.group(1).strip()).replace("\n ", "\n") if cb else "" + # expected output + eo = re.search(r"\.\. agent-skill-expected-output::\s*\n(.*?)(?=\n\.\. agent-skill|\Z)", block, re.S) + skill["expected_output"] = re.sub(r"^ ", "", eo.group(1).strip()).replace("\n ", "\n") if eo else "" + skills.append(skill) + return skills Review Comment: `parse_skills()` relies on regexes that are sensitive to exact indentation (e.g., options must start with exactly three spaces, and command/expected-output unindent assumes exactly six spaces). This is brittle for RST and will be hard to maintain as docs evolve. Consider parsing based on “indented block” rules (detect indentation level dynamically) or using docutils to parse directives, so extraction is robust to minor formatting changes. ########## scripts/ci/pre_commit/tests/test_validate_skills.py: ########## @@ -0,0 +1,130 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for validate_skills.py.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +SCRIPT_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(SCRIPT_ROOT)) + +from agent_skills import validate_skills # noqa: E402 + + Review Comment: These tests modify `sys.path` at runtime to import `agent_skills`. This can mask import/package issues and make tests order-dependent. Prefer importing via the repo’s normal Python packaging/test configuration (or use `PYTHONPATH` in the test runner) so imports work the same way in CI and locally. ```suggestion import importlib.util from pathlib import Path import pytest SCRIPT_ROOT = Path(__file__).resolve().parents[2] _VALIDATE_SKILLS_PATH = SCRIPT_ROOT / "agent_skills" / "validate_skills.py" _spec = importlib.util.spec_from_file_location("agent_skills.validate_skills", _VALIDATE_SKILLS_PATH) if _spec is None or _spec.loader is None: raise ImportError(f"Cannot load validate_skills from {_VALIDATE_SKILLS_PATH}") validate_skills = importlib.util.module_from_spec(_spec) _spec.loader.exec_module(validate_skills) # type: ignore[arg-type] ``` ########## scripts/ci/pre_commit/tests/test_extract_agent_skills.py: ########## @@ -0,0 +1,474 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for extract_agent_skills.py.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +# Add parent so we can import extract_agent_skills +SCRIPT_DIR = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(SCRIPT_DIR)) + +from extract_agent_skills import extract_all_skills, parse_skills, validate_skill + Review Comment: This test file mutates `sys.path` to import the script module directly. This makes imports dependent on execution location and can hide real packaging issues. Prefer importing via a proper module path (e.g., `scripts.ci.pre_commit...` if available) or configuring tests to include the repo root on `PYTHONPATH` rather than doing per-test `sys.path.insert`. ```suggestion import importlib.util import json import subprocess import sys from pathlib import Path # Load extract_agent_skills module directly from its file path without mutating sys.path SCRIPT_DIR = Path(__file__).resolve().parent.parent _extract_agent_skills_path = SCRIPT_DIR / "extract_agent_skills.py" _spec = importlib.util.spec_from_file_location("extract_agent_skills", _extract_agent_skills_path) _extract_agent_skills_module = importlib.util.module_from_spec(_spec) assert _spec is not None and _spec.loader is not None _spec.loader.exec_module(_extract_agent_skills_module) extract_all_skills = _extract_agent_skills_module.extract_all_skills parse_skills = _extract_agent_skills_module.parse_skills validate_skill = _extract_agent_skills_module.validate_skill ``` ########## scripts/ci/pre_commit/tests/test_e2e.py: ########## @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""End-to-end tests for agent-skills extraction and command routing.""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest + +PRE_COMMIT_DIR = Path(__file__).resolve().parents[1] +CI_DIR = Path(__file__).resolve().parents[2] +REPO_ROOT = Path(__file__).resolve().parents[4] +sys.path.insert(0, str(CI_DIR)) + +from agent_skills import breeze_context # noqa: E402 + + +def _extract_skills_to_file(skills_file: Path) -> subprocess.CompletedProcess[str]: + extract_script = PRE_COMMIT_DIR / "extract_agent_skills.py" + return subprocess.run( + [ + sys.executable, + str(extract_script), + "--docs-file", + str(REPO_ROOT / "AGENTS.md"), + "--docs-dir", + str(REPO_ROOT / "contributing-docs"), + "--output", + str(skills_file), + ], + capture_output=True, + text=True, + cwd=REPO_ROOT, + ) Review Comment: These end-to-end tests run the extractor/check scripts via `subprocess.run(...)` without any timeout. If the script blocks (e.g., due to unexpected parsing edge cases), pytest can hang. Please add a reasonable `timeout=` to these subprocess calls to make CI failures bounded. ########## scripts/ci/pre_commit/tests/test_skill_graph.py: ########## @@ -0,0 +1,98 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use it except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for skill_graph.py.""" +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +SCRIPT_DIR = Path(__file__).resolve().parent.parent + + +def _run_skill_graph(skills_json: Path, output_json: Path | None = None) -> subprocess.CompletedProcess: + cmd = [sys.executable, str(SCRIPT_DIR / "skill_graph.py"), "--skills-json", str(skills_json)] + if output_json is not None: + cmd.extend(["--output", str(output_json)]) + return subprocess.run(cmd, capture_output=True, text=True) Review Comment: `subprocess.run(..., capture_output=True, text=True)` here should set `check=False/True` explicitly and include a `timeout=` to prevent the test from hanging if the script blocks. Adding a timeout also makes CI failure modes clearer. ```suggestion return subprocess.run(cmd, capture_output=True, text=True, check=False, timeout=30) ``` ########## scripts/ci/agent_skills/query_skills.py: ########## @@ -0,0 +1,118 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Query the Agent Skills manifest for contributors and AI agents.""" + +from __future__ import annotations + +import argparse +import json +import pathlib +import sys + +SKILLS_JSON = pathlib.Path("contributing-docs/agent_skills/skills.json") +GRAPH_JSON = pathlib.Path("contributing-docs/agent_skills/skill_graph.json") + + +def load_skills() -> list[dict]: + with SKILLS_JSON.open() as f: + return json.load(f)["skills"] + + +def load_graph() -> dict: + with GRAPH_JSON.open() as f: + return json.load(f) Review Comment: `load_skills()` / `load_graph()` open `skills.json` / `skill_graph.json` relative to the current working directory. Invoking this CLI from outside the repo root will fail with FileNotFoundError. Consider resolving paths relative to the repo root (e.g., `Path(__file__).resolve().parents[...]`) or accepting a `--skills-json` / `--graph-json` argument (with current paths as defaults). ########## scripts/ci/agent_skills/breeze_context.py: ########## @@ -0,0 +1,170 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Apache Airflow Agent Skills — Breeze Context Detection. + +Provides runtime host vs. container detection and command routing for +AI coding agents contributing to Apache Airflow. + +Detection priority chain: + 1. AIRFLOW_BREEZE_CONTAINER env var (explicit) + 2. /.dockerenv file (Docker marker) + 2b. /.containerenv file (Podman marker) + 3. /opt/airflow path (Breeze mount) + 4. Default: host +""" + +from __future__ import annotations + +import json +import os +import pathlib + +SKILLS_JSON = pathlib.Path("contributing-docs/agent_skills/skills.json") Review Comment: `SKILLS_JSON` is a relative path, which means `breeze_context` will fail to find the manifest if imported/used from a working directory other than the repo root. Consider resolving the default path relative to the repo root (e.g., based on `__file__`) or providing an environment variable/parameter override so consumers can locate the manifest reliably. ```suggestion # Resolve skills manifest path relative to the repository root, not the CWD. # This makes imports/usage from arbitrary working directories reliable. _REPO_ROOT = pathlib.Path(__file__).resolve().parents[3] _DEFAULT_SKILLS_JSON = _REPO_ROOT / "contributing-docs/agent_skills/skills.json" _SKILLS_JSON_OVERRIDE = os.environ.get("AIRFLOW_AGENT_SKILLS_JSON") SKILLS_JSON = pathlib.Path(_SKILLS_JSON_OVERRIDE) if _SKILLS_JSON_OVERRIDE else _DEFAULT_SKILLS_JSON ``` ########## scripts/ci/pre_commit/tests/test_breeze_context.py: ########## @@ -0,0 +1,165 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for breeze_context.py.""" + +from __future__ import annotations + +import json +import pathlib +import sys +from pathlib import Path + +import pytest + +SCRIPT_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(SCRIPT_ROOT)) + +from agent_skills import breeze_context # noqa: E402 Review Comment: This test module modifies `sys.path` at runtime to import `agent_skills`. This can make tests order-dependent and hide packaging/import issues. Prefer configuring test execution so the repo root / relevant package is importable without per-test `sys.path.insert`. ```suggestion import importlib.util import json import pathlib from pathlib import Path import pytest SCRIPT_ROOT = Path(__file__).resolve().parents[2] BREEZE_CONTEXT_PATH = SCRIPT_ROOT / "agent_skills" / "breeze_context.py" _spec = importlib.util.spec_from_file_location("agent_skills.breeze_context", BREEZE_CONTEXT_PATH) breeze_context = importlib.util.module_from_spec(_spec) # type: ignore[assignment] assert _spec is not None and _spec.loader is not None _spec.loader.exec_module(breeze_context) # type: ignore[union-attr] ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
