This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new f4d0e6ed feat(spec-validator): enforce SPDX-License-Identifier header
on spec files (#514)
f4d0e6ed is described below
commit f4d0e6eddc736590e1964b379411fc8ca3dae5a4
Author: Justin Mclean <[email protected]>
AuthorDate: Sun Jun 14 09:55:09 2026 +1000
feat(spec-validator): enforce SPDX-License-Identifier header on spec files
(#514)
Adds check #8 to the spec-validator: every .md file that carries YAML
frontmatter must include '<\!-- SPDX-License-Identifier: Apache-2.0' before
the opening --- delimiter. Files without frontmatter (README, overview) are
skipped silently — no change to their handling.
Adds validate_spdx_header() in __init__.py, plumbs it into validate_file(),
updates the module docstring, and adds TestValidateSpdxHeader (7 tests) in
the test suite. All 64 tests pass.
Also fixes run-workspace-check.sh: on Apple Silicon Macs where git is an
x86_64 binary (Rosetta), pre-commit hooks spawn x86_64 Python that cannot
load arm64-compiled extensions (mypy, cffi). The script now probes for this
condition at runtime and falls back to arch -arm64 python3 for the mypy and
pytest checks.
Generated-by: Claude (Sonnet 4.6)
---
tools/dev/run-workspace-check.sh | 25 ++++++++-
.../spec-validator/src/spec_validator/__init__.py | 37 ++++++++++++-
tools/spec-validator/tests/test_spec_validator.py | 62 ++++++++++++++++++++--
3 files changed, 119 insertions(+), 5 deletions(-)
diff --git a/tools/dev/run-workspace-check.sh b/tools/dev/run-workspace-check.sh
index 75c89a09..317453ce 100755
--- a/tools/dev/run-workspace-check.sh
+++ b/tools/dev/run-workspace-check.sh
@@ -66,6 +66,22 @@ CHECK_KEY="$1"
CHECK_CMD="$2"
shift 2
+# On Apple Silicon Macs, git ships as x86_64 (Rosetta). Pre-commit hooks
+# invoked by git therefore run in an x86_64 context where universal Python
+# binaries also run as x86_64, which cannot load arm64-compiled extensions
+# (e.g. mypy .so files, cffi, cryptography). Probe for this condition at
+# runtime: if the workspace venv Python runs as x86_64 but arch -arm64 can
+# switch it to arm64, prefer the native arm64 path for Python-based checks.
+_NATIVE_PYTHON=""
+_VENV_PYTHON="$(pwd)/.venv/bin/python3"
+if [[ -x "$_VENV_PYTHON" ]]; then
+ _CURR_ARCH=$("$_VENV_PYTHON" -c "import platform; print(platform.machine())"
2>/dev/null || echo "")
+ _ARM64_ARCH=$(arch -arm64 "$_VENV_PYTHON" -c "import platform;
print(platform.machine())" 2>/dev/null || echo "")
+ if [[ "$_CURR_ARCH" == "x86_64" ]] && [[ "$_ARM64_ARCH" == "arm64" ]]; then
+ _NATIVE_PYTHON="arch -arm64 $_VENV_PYTHON"
+ fi
+fi
+
# Discover workspace members + per-check applicability. The Python
# helper walks the root `[tool.uv.workspace] members` list, opens
# each member's pyproject.toml, and emits one line per applicable
@@ -148,8 +164,15 @@ for member in $applicable; do
# `uv run --directory` so each member runs with its own `cwd` —
# ruff / mypy / pytest configs resolve paths relative to the
# member root.
+ # On Apple Silicon + Rosetta, use native arm64 Python for Python-based
+ # checks to avoid loading arm64 extensions from an x86_64 Python process.
+ # Ruff is a Rust binary that already runs natively so no prefix is needed.
# shellcheck disable=SC2086 # CHECK_CMD may legitimately be multi-token
- if ! uv run --directory "$member" $CHECK_CMD "$@"; then
+ if [[ -n "$_NATIVE_PYTHON" ]] && [[ "$CHECK_KEY" != "ruff" ]] && [[
"$CHECK_KEY" != "ruff-format" ]]; then
+ if ! (cd "$member" && $_NATIVE_PYTHON -m $CHECK_CMD "$@"); then
+ failed+=("$name")
+ fi
+ elif ! uv run --directory "$member" $CHECK_CMD "$@"; then
failed+=("$name")
fi
done
diff --git a/tools/spec-validator/src/spec_validator/__init__.py
b/tools/spec-validator/src/spec_validator/__init__.py
index 02c53ac7..809b324e 100644
--- a/tools/spec-validator/src/spec_validator/__init__.py
+++ b/tools/spec-validator/src/spec_validator/__init__.py
@@ -28,6 +28,9 @@ Checks every .md file that carries a YAML frontmatter block:
Behaviour & contract, Out of scope, Acceptance criteria, Validation,
Known gaps.
7. Validation section contains at least one fenced code block.
+8. SPDX license header — every spec file must carry the Apache-2.0 SPDX
+ identifier (``<!-- SPDX-License-Identifier: Apache-2.0``) before the
+ opening ``---`` frontmatter delimiter.
Files without frontmatter (README.md, overview.md) are skipped silently.
@@ -65,6 +68,9 @@ REQUIRED_SECTIONS: tuple[str, ...] = (
"Known gaps",
)
+# Every spec .md file must carry this SPDX identifier before the frontmatter.
+SPDX_MARKER = "SPDX-License-Identifier: Apache-2.0"
+
DEFAULT_SPEC_DIR = Path("tools/spec-loop/specs")
_HTML_COMMENT_RE = re.compile(r"<!--[\s\S]*?-->")
@@ -286,6 +292,35 @@ def validate_body(path: Path, text: str) ->
list[Violation]:
return violations
+# ---------------------------------------------------------------------------
+# SPDX header validation
+# ---------------------------------------------------------------------------
+
+
+def validate_spdx_header(path: Path, text: str) -> list[Violation]:
+ """Check that a spec file carries the Apache-2.0 SPDX license header.
+
+ Every ``.md`` file that has a YAML frontmatter block (i.e. is a spec,
+ not a README or overview) must contain ``SPDX-License-Identifier:
+ Apache-2.0`` somewhere before the opening ``---`` delimiter. Files
+ without frontmatter are skipped silently — they are not spec files.
+ """
+ if parse_frontmatter(text) is None:
+ return [] # Not a spec file; no SPDX required.
+
+ if SPDX_MARKER not in text:
+ return [
+ Violation(
+ path,
+ 1,
+ f"missing SPDX license header — spec files must contain "
+ f"'<!-- {SPDX_MARKER}' before the frontmatter delimiter; "
+ f"see AGENTS.md § Commit and PR conventions",
+ )
+ ]
+ return []
+
+
# ---------------------------------------------------------------------------
# Orchestrator
# ---------------------------------------------------------------------------
@@ -296,7 +331,7 @@ def validate_file(path: Path) -> list[Violation]:
text = path.read_text(encoding="utf-8")
except OSError as exc:
return [Violation(path, None, f"cannot read file: {exc}")]
- return validate_frontmatter(path, text) + validate_body(path, text)
+ return validate_spdx_header(path, text) + validate_frontmatter(path, text)
+ validate_body(path, text)
def collect_spec_files(target: Path) -> list[Path]:
diff --git a/tools/spec-validator/tests/test_spec_validator.py
b/tools/spec-validator/tests/test_spec_validator.py
index 4cfeaebe..6b2137de 100644
--- a/tools/spec-validator/tests/test_spec_validator.py
+++ b/tools/spec-validator/tests/test_spec_validator.py
@@ -29,6 +29,7 @@ from spec_validator import (
ALLOWED_MODE,
ALLOWED_STATUS,
REQUIRED_SECTIONS,
+ SPDX_MARKER,
extract_section_headings,
get_section_body,
has_acceptance_items,
@@ -37,6 +38,7 @@ from spec_validator import (
run_validation,
validate_body,
validate_frontmatter,
+ validate_spdx_header,
validation_has_code_block,
)
@@ -92,8 +94,12 @@ _VALID_SPEC = textwrap.dedent("""\
""")
-def _make_spec(*, status: str = "stable", **overrides: str) -> str:
- """Build a minimal valid spec, replacing frontmatter values as needed."""
+def _make_spec(*, status: str = "stable", spdx: bool = True, **overrides: str)
-> str:
+ """Build a minimal valid spec, replacing frontmatter values as needed.
+
+ Pass ``spdx=False`` to produce a spec intentionally missing the SPDX
+ header (used by SPDX-check tests).
+ """
defaults = {
"title": "Test spec",
"kind": "feature",
@@ -119,7 +125,8 @@ def _make_spec(*, status: str = "stable", **overrides: str)
-> str:
"## Validation\n\n```bash\npytest\n```",
)
fm = "\n".join(fm_lines)
- return f"---\n{fm}\n---\n\n# Test spec\n\n{body_sections}\n"
+ header = f"<!-- {SPDX_MARKER}\n
https://www.apache.org/licenses/LICENSE-2.0 -->\n\n" if spdx else ""
+ return f"{header}---\n{fm}\n---\n\n# Test spec\n\n{body_sections}\n"
# ---------------------------------------------------------------------------
@@ -223,6 +230,55 @@ class TestValidationHasCodeBlock:
assert validation_has_code_block(text) is False
+# ---------------------------------------------------------------------------
+# validate_spdx_header
+# ---------------------------------------------------------------------------
+
+
+class TestValidateSpdxHeader:
+ def test_spec_with_spdx_passes(self, tmp_path: Path) -> None:
+ text = _make_spec() # spdx=True by default
+ p = tmp_path / "spec.md"
+ p.write_text(text)
+ assert validate_spdx_header(p, text) == []
+
+ def test_spec_missing_spdx_flagged(self, tmp_path: Path) -> None:
+ text = _make_spec(spdx=False)
+ p = tmp_path / "spec.md"
+ p.write_text(text)
+ violations = validate_spdx_header(p, text)
+ assert len(violations) == 1
+ assert SPDX_MARKER in violations[0].message
+
+ def test_readme_without_frontmatter_skipped(self, tmp_path: Path) -> None:
+ text = "# README\n\nNo frontmatter, no SPDX required.\n"
+ p = tmp_path / "README.md"
+ p.write_text(text)
+ assert validate_spdx_header(p, text) == []
+
+ def test_valid_spec_fixture_has_spdx(self, tmp_path: Path) -> None:
+ p = tmp_path / "spec.md"
+ p.write_text(_VALID_SPEC)
+ assert validate_spdx_header(p, _VALID_SPEC) == []
+
+ def test_spdx_violation_line_number_is_1(self, tmp_path: Path) -> None:
+ text = _make_spec(spdx=False)
+ p = tmp_path / "spec.md"
+ p.write_text(text)
+ violations = validate_spdx_header(p, text)
+ assert violations[0].line == 1
+
+ def test_run_validation_catches_missing_spdx(self, tmp_path: Path) -> None:
+ (tmp_path / "spec.md").write_text(_make_spec(spdx=False))
+ violations = run_validation(tmp_path)
+ assert any(SPDX_MARKER in v.message for v in violations)
+
+ def test_run_validation_accepts_spec_with_spdx(self, tmp_path: Path) ->
None:
+ (tmp_path / "spec.md").write_text(_make_spec())
+ violations = [v for v in run_validation(tmp_path) if SPDX_MARKER in
v.message]
+ assert violations == []
+
+
# ---------------------------------------------------------------------------
# validate_frontmatter
# ---------------------------------------------------------------------------