This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new e9608967 feat(spec-validator): add check #9 — validate paths in
Validation blocks (#513)
e9608967 is described below
commit e9608967c7f5bd35e3e2431f414985618ef980f8
Author: Justin Mclean <[email protected]>
AuthorDate: Sun Jun 14 11:24:11 2026 +1000
feat(spec-validator): add check #9 — validate paths in Validation blocks
(#513)
Adds validate_validation_paths() which extracts filesystem paths from
shell patterns in ## Validation code blocks (--project, --directory,
bash -n, shellcheck, test -f) and checks each exists under the repo
root. Shell variables ($) and placeholder tokens (<) are silently
skipped. Catches stale paths after renames before they accumulate.
Also fixes a stale path in mentoring-mode.md (Validation block
referenced .claude/skills/good-first-issue-author/SKILL.md; the
installed name is magpie-good-first-issue-author).
Generated-by: Claude (Opus 4.7)
---
tools/spec-loop/specs/mentoring-mode.md | 2 +-
.../spec-validator/src/spec_validator/__init__.py | 112 +++++++++++++-
tools/spec-validator/tests/test_spec_validator.py | 161 ++++++++++++++++++++-
3 files changed, 268 insertions(+), 7 deletions(-)
diff --git a/tools/spec-loop/specs/mentoring-mode.md
b/tools/spec-loop/specs/mentoring-mode.md
index b5e5cffc..98e768d4 100644
--- a/tools/spec-loop/specs/mentoring-mode.md
+++ b/tools/spec-loop/specs/mentoring-mode.md
@@ -100,7 +100,7 @@ a project can offer a first-time contributor.
```bash
test -f docs/mentoring/spec.md
-test -f .claude/skills/good-first-issue-author/SKILL.md
+test -f .claude/skills/magpie-good-first-issue-author/SKILL.md
uv run --project tools/skill-and-tool-validator --group dev
skill-and-tool-validate
uv run --project tools/skill-evals skill-eval
tools/skill-evals/evals/good-first-issue-author/
```
diff --git a/tools/spec-validator/src/spec_validator/__init__.py
b/tools/spec-validator/src/spec_validator/__init__.py
index 809b324e..dd3b8d64 100644
--- a/tools/spec-validator/src/spec_validator/__init__.py
+++ b/tools/spec-validator/src/spec_validator/__init__.py
@@ -31,6 +31,10 @@ Checks every .md file that carries a YAML frontmatter block:
8. SPDX license header — every spec file must carry the Apache-2.0 SPDX
identifier (``<!-- SPDX-License-Identifier: Apache-2.0``) before the
opening ``---`` frontmatter delimiter.
+9. Validation-path existence — every filesystem path referenced by a
+ shell pattern (``--project``, ``--directory``, ``bash -n``,
+ ``shellcheck``, ``test -f``) in a Validation code block must exist
+ under the repository root. Catches stale paths after renames.
Files without frontmatter (README.md, overview.md) are skipped silently.
@@ -77,6 +81,23 @@ _HTML_COMMENT_RE = re.compile(r"<!--[\s\S]*?-->")
_FENCED_CODE_RE = re.compile(r"^ {0,3}```[\s\S]*?^ {0,3}```", re.MULTILINE)
_YAML_BLOCK_SCALAR_HEADERS: frozenset[str] = frozenset({"|", ">", "|-", "|+",
">-", ">+"})
+# ---------------------------------------------------------------------------
+# Validation-path check constants (check #8)
+# ---------------------------------------------------------------------------
+
+# Patterns that extract filesystem paths from shell validation commands.
+# Each tuple is (compiled regex with one capturing group, human-readable
label).
+_VALIDATION_PATH_EXTRACTORS: list[tuple[re.Pattern[str], str]] = [
+ (re.compile(r"--(?:project|directory)\s+(\S+)"), "uv
--project/--directory"),
+ (re.compile(r"\bbash\s+-n\s+(\S+)"), "bash -n"),
+ (re.compile(r"\bshellcheck\s+(\S+)"), "shellcheck"),
+ (re.compile(r"\btest\s+-f\s+(\S+)"), "test -f"),
+]
+
+# Characters that mark a captured path as a shell variable or placeholder token
+# and should be silently skipped by the path-existence check.
+_PATH_SKIP_CHARS: tuple[str, ...] = ("$", "<")
+
# ---------------------------------------------------------------------------
# Data structures
@@ -293,7 +314,7 @@ def validate_body(path: Path, text: str) -> list[Violation]:
# ---------------------------------------------------------------------------
-# SPDX header validation
+# SPDX header validation (check #8)
# ---------------------------------------------------------------------------
@@ -321,17 +342,97 @@ def validate_spdx_header(path: Path, text: str) ->
list[Violation]:
return []
+# ---------------------------------------------------------------------------
+# Validation-path existence check (check #9)
+# ---------------------------------------------------------------------------
+
+
+def _join_line_continuations(text: str) -> str:
+ r"""Join shell line-continuations (trailing ``\``) into single logical
lines."""
+ return re.sub(r"\\\n\s*", " ", text)
+
+
+def find_repo_root(start: Path | None = None) -> Path:
+ """Walk up from *start* (or CWD) to find the repository root.
+
+ Uses the presence of a ``tools/`` directory as the canonical marker.
+ Falls back to the starting path when no such ancestor is found.
+ """
+ cur = (start or Path.cwd()).resolve()
+ for candidate in (cur, *cur.parents):
+ if (candidate / "tools").is_dir():
+ return candidate
+ return cur
+
+
+def validate_validation_paths(path: Path, text: str, repo_root: Path | None =
None) -> list[Violation]:
+ """Check that filesystem paths in ## Validation code blocks exist on disk.
+
+ Extracts path arguments from these shell patterns inside fenced code
+ blocks of the ``## Validation`` section:
+
+ - ``uv run --project <path>`` and ``uv run --directory <path>``
+ - ``bash -n <file>``
+ - ``shellcheck <file>``
+ - ``test -f <file>``
+
+ Paths containing ``$`` (shell variables) or ``<`` (placeholder tokens)
+ are silently skipped. Bare tokens without a ``/`` are also skipped.
+ Relative paths are resolved against the repository root.
+ """
+ if parse_frontmatter(text) is None:
+ return [] # Not a spec file
+
+ section_body = get_section_body(text, "Validation")
+ if not section_body:
+ return []
+
+ root = repo_root or find_repo_root(path.parent)
+ violations: list[Violation] = []
+
+ for block_match in _FENCED_CODE_RE.finditer(section_body):
+ block_text = _join_line_continuations(block_match.group())
+ for pattern, label in _VALIDATION_PATH_EXTRACTORS:
+ for m in pattern.finditer(block_text):
+ raw = m.group(1).rstrip(";\\|&")
+ if not raw:
+ continue
+ # Skip shell variables and placeholder tokens
+ if any(skip in raw for skip in _PATH_SKIP_CHARS):
+ continue
+ # Skip bare command names without a path separator
+ if "/" not in raw:
+ continue
+ target = Path(raw) if Path(raw).is_absolute() else root / raw
+ if not target.exists():
+ violations.append(
+ Violation(
+ path,
+ None,
+ f"validation command references missing path:
{raw!r} "
+ f"(pattern: {label}; resolved to {target})",
+ )
+ )
+
+ return violations
+
+
# ---------------------------------------------------------------------------
# Orchestrator
# ---------------------------------------------------------------------------
-def validate_file(path: Path) -> list[Violation]:
+def validate_file(path: Path, repo_root: Path | None = None) ->
list[Violation]:
try:
text = path.read_text(encoding="utf-8")
except OSError as exc:
return [Violation(path, None, f"cannot read file: {exc}")]
- return validate_spdx_header(path, text) + validate_frontmatter(path, text)
+ validate_body(path, text)
+ return (
+ validate_spdx_header(path, text)
+ + validate_frontmatter(path, text)
+ + validate_body(path, text)
+ + validate_validation_paths(path, text, repo_root=repo_root)
+ )
def collect_spec_files(target: Path) -> list[Path]:
@@ -341,10 +442,11 @@ def collect_spec_files(target: Path) -> list[Path]:
return sorted(target.rglob("*.md"))
-def run_validation(target: Path) -> list[Violation]:
+def run_validation(target: Path, repo_root: Path | None = None) ->
list[Violation]:
+ root = repo_root or find_repo_root(target)
violations: list[Violation] = []
for path in collect_spec_files(target):
- violations.extend(validate_file(path))
+ violations.extend(validate_file(path, repo_root=root))
return violations
diff --git a/tools/spec-validator/tests/test_spec_validator.py
b/tools/spec-validator/tests/test_spec_validator.py
index 6b2137de..d33c6491 100644
--- a/tools/spec-validator/tests/test_spec_validator.py
+++ b/tools/spec-validator/tests/test_spec_validator.py
@@ -31,6 +31,7 @@ from spec_validator import (
REQUIRED_SECTIONS,
SPDX_MARKER,
extract_section_headings,
+ find_repo_root,
get_section_body,
has_acceptance_items,
main,
@@ -39,6 +40,7 @@ from spec_validator import (
validate_body,
validate_frontmatter,
validate_spdx_header,
+ validate_validation_paths,
validation_has_code_block,
)
@@ -85,7 +87,7 @@ _VALID_SPEC = textwrap.dedent("""\
## Validation
```bash
- uv run --project tools/example --group dev pytest
+ pytest
```
## Known gaps
@@ -129,6 +131,25 @@ def _make_spec(*, status: str = "stable", spdx: bool =
True, **overrides: str) -
return f"{header}---\n{fm}\n---\n\n# Test spec\n\n{body_sections}\n"
+def _make_spec_with_validation(cmd: str) -> str:
+ """Build a minimal valid spec with a custom Validation command."""
+ body_sections = "\n\n".join(f"## {s}\n\nContent." for s in
REQUIRED_SECTIONS)
+ body_sections = body_sections.replace(
+ "## Validation\n\nContent.",
+ f"## Validation\n\n```bash\n{cmd}\n```",
+ )
+ fm = (
+ "title: Test spec\n"
+ "status: stable\n"
+ "kind: feature\n"
+ "mode: Triage\n"
+ "source: MISSION.md\n"
+ "acceptance:\n"
+ " - One criterion.\n"
+ )
+ return f"---\n{fm}---\n\n# Test spec\n\n{body_sections}\n"
+
+
# ---------------------------------------------------------------------------
# Frontmatter parsing
# ---------------------------------------------------------------------------
@@ -446,6 +467,144 @@ class TestRunValidation:
assert "violation" in captured.out
+# ---------------------------------------------------------------------------
+# Validation-path check (check #8)
+# ---------------------------------------------------------------------------
+
+
+class TestValidateValidationPaths:
+ """Tests for check #8: paths in ## Validation code blocks must exist."""
+
+ def test_existing_project_path_no_violation(self, tmp_path: Path) -> None:
+ (tmp_path / "tools" / "my-tool").mkdir(parents=True)
+ spec = _make_spec_with_validation("uv run --project tools/my-tool
--group dev pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_missing_project_path_violation(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("uv run --project tools/nonexistent
--group dev pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 1
+ assert "tools/nonexistent" in violations[0].message
+
+ def test_shell_variable_skipped(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation(
+ "for t in a b; do uv run --project tools/$t --group dev pytest;
done"
+ )
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_placeholder_token_skipped(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("uv run --project tools/<tool-name>
--group dev pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_bare_command_skipped(self, tmp_path: Path) -> None:
+ # "pytest" alone has no "/" — not a path
+ spec = _make_spec_with_validation("pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_bash_n_existing_file_no_violation(self, tmp_path: Path) -> None:
+ script = tmp_path / "tools" / "run.sh"
+ script.parent.mkdir(parents=True)
+ script.touch()
+ spec = _make_spec_with_validation("bash -n tools/run.sh")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_bash_n_missing_file_violation(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("bash -n tools/run.sh")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 1
+ assert "tools/run.sh" in violations[0].message
+ assert "bash -n" in violations[0].message
+
+ def test_shellcheck_missing_file_violation(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("shellcheck tools/run.sh")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 1
+ assert "tools/run.sh" in violations[0].message
+
+ def test_shellcheck_existing_file_no_violation(self, tmp_path: Path) ->
None:
+ script = tmp_path / "tools" / "run.sh"
+ script.parent.mkdir(parents=True)
+ script.touch()
+ spec = _make_spec_with_validation("shellcheck tools/run.sh")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_test_f_existing_file_no_violation(self, tmp_path: Path) -> None:
+ readme = tmp_path / "docs" / "setup" / "README.md"
+ readme.parent.mkdir(parents=True)
+ readme.touch()
+ spec = _make_spec_with_validation("test -f docs/setup/README.md")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_test_f_missing_file_violation(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("test -f docs/setup/README.md")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 1
+ assert "docs/setup/README.md" in violations[0].message
+
+ def test_no_frontmatter_skipped(self, tmp_path: Path) -> None:
+ text = "# No frontmatter\n\n## Validation\n\n```bash\nuv run --project
tools/nonexistent\n```\n"
+ violations = validate_validation_paths(Path("test.md"), text,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_uv_directory_pattern(self, tmp_path: Path) -> None:
+ (tmp_path / "tools" / "my-tool").mkdir(parents=True)
+ spec = _make_spec_with_validation("uv run --directory tools/my-tool
pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_uv_directory_missing_violation(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation("uv run --directory tools/gone
pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 1
+ assert "tools/gone" in violations[0].message
+
+ def test_line_continuation_joined(self, tmp_path: Path) -> None:
+ (tmp_path / "tools" / "my-tool").mkdir(parents=True)
+ spec = _make_spec_with_validation("uv run --project tools/my-tool \\\n
--group dev pytest")
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert violations == []
+
+ def test_multiple_missing_paths(self, tmp_path: Path) -> None:
+ spec = _make_spec_with_validation(
+ "uv run --project tools/foo --group dev pytest\nbash -n
tools/foo/run.sh"
+ )
+ violations = validate_validation_paths(Path("specs/test.md"), spec,
repo_root=tmp_path)
+ assert len(violations) == 2
+
+
+# ---------------------------------------------------------------------------
+# find_repo_root
+# ---------------------------------------------------------------------------
+
+
+class TestFindRepoRoot:
+ def test_finds_tools_dir(self, tmp_path: Path) -> None:
+ (tmp_path / "tools").mkdir()
+ result = find_repo_root(tmp_path)
+ assert result == tmp_path
+
+ def test_walks_up_to_tools_dir(self, tmp_path: Path) -> None:
+ (tmp_path / "tools").mkdir()
+ child = tmp_path / "tools" / "spec-loop" / "specs"
+ child.mkdir(parents=True)
+ result = find_repo_root(child)
+ assert result == tmp_path
+
+ def test_falls_back_to_start_when_no_tools(self, tmp_path: Path) -> None:
+ isolated = tmp_path / "isolated"
+ isolated.mkdir()
+ result = find_repo_root(isolated)
+ # Should fall back — does not raise; returns some ancestor
+ assert isinstance(result, Path)
+
+
# ---------------------------------------------------------------------------
# Live specs (smoke test)
# ---------------------------------------------------------------------------