This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git


The following commit(s) were added to refs/heads/main by this push:
     new f4d0e6ed feat(spec-validator): enforce SPDX-License-Identifier header 
on spec files (#514)
f4d0e6ed is described below

commit f4d0e6eddc736590e1964b379411fc8ca3dae5a4
Author: Justin Mclean <[email protected]>
AuthorDate: Sun Jun 14 09:55:09 2026 +1000

    feat(spec-validator): enforce SPDX-License-Identifier header on spec files 
(#514)
    
    Adds check #8 to the spec-validator: every .md file that carries YAML
    frontmatter must include '<\!-- SPDX-License-Identifier: Apache-2.0' before
    the opening --- delimiter. Files without frontmatter (README, overview) are
    skipped silently — no change to their handling.
    
    Adds validate_spdx_header() in __init__.py, plumbs it into validate_file(),
    updates the module docstring, and adds TestValidateSpdxHeader (7 tests) in
    the test suite. All 64 tests pass.
    
    Also fixes run-workspace-check.sh: on Apple Silicon Macs where git is an
    x86_64 binary (Rosetta), pre-commit hooks spawn x86_64 Python that cannot
    load arm64-compiled extensions (mypy, cffi). The script now probes for this
    condition at runtime and falls back to arch -arm64 python3 for the mypy and
    pytest checks.
    
    Generated-by: Claude (Sonnet 4.6)
---
 tools/dev/run-workspace-check.sh                   | 25 ++++++++-
 .../spec-validator/src/spec_validator/__init__.py  | 37 ++++++++++++-
 tools/spec-validator/tests/test_spec_validator.py  | 62 ++++++++++++++++++++--
 3 files changed, 119 insertions(+), 5 deletions(-)

diff --git a/tools/dev/run-workspace-check.sh b/tools/dev/run-workspace-check.sh
index 75c89a09..317453ce 100755
--- a/tools/dev/run-workspace-check.sh
+++ b/tools/dev/run-workspace-check.sh
@@ -66,6 +66,22 @@ CHECK_KEY="$1"
 CHECK_CMD="$2"
 shift 2
 
+# On Apple Silicon Macs, git ships as x86_64 (Rosetta). Pre-commit hooks
+# invoked by git therefore run in an x86_64 context where universal Python
+# binaries also run as x86_64, which cannot load arm64-compiled extensions
+# (e.g. mypy .so files, cffi, cryptography). Probe for this condition at
+# runtime: if the workspace venv Python runs as x86_64 but arch -arm64 can
+# switch it to arm64, prefer the native arm64 path for Python-based checks.
+_NATIVE_PYTHON=""
+_VENV_PYTHON="$(pwd)/.venv/bin/python3"
+if [[ -x "$_VENV_PYTHON" ]]; then
+  _CURR_ARCH=$("$_VENV_PYTHON" -c "import platform; print(platform.machine())" 
2>/dev/null || echo "")
+  _ARM64_ARCH=$(arch -arm64 "$_VENV_PYTHON" -c "import platform; 
print(platform.machine())" 2>/dev/null || echo "")
+  if [[ "$_CURR_ARCH" == "x86_64" ]] && [[ "$_ARM64_ARCH" == "arm64" ]]; then
+    _NATIVE_PYTHON="arch -arm64 $_VENV_PYTHON"
+  fi
+fi
+
 # Discover workspace members + per-check applicability. The Python
 # helper walks the root `[tool.uv.workspace] members` list, opens
 # each member's pyproject.toml, and emits one line per applicable
@@ -148,8 +164,15 @@ for member in $applicable; do
   # `uv run --directory` so each member runs with its own `cwd` —
   # ruff / mypy / pytest configs resolve paths relative to the
   # member root.
+  # On Apple Silicon + Rosetta, use native arm64 Python for Python-based
+  # checks to avoid loading arm64 extensions from an x86_64 Python process.
+  # Ruff is a Rust binary that already runs natively so no prefix is needed.
   # shellcheck disable=SC2086 # CHECK_CMD may legitimately be multi-token
-  if ! uv run --directory "$member" $CHECK_CMD "$@"; then
+  if [[ -n "$_NATIVE_PYTHON" ]] && [[ "$CHECK_KEY" != "ruff" ]] && [[ 
"$CHECK_KEY" != "ruff-format" ]]; then
+    if ! (cd "$member" && $_NATIVE_PYTHON -m $CHECK_CMD "$@"); then
+      failed+=("$name")
+    fi
+  elif ! uv run --directory "$member" $CHECK_CMD "$@"; then
     failed+=("$name")
   fi
 done
diff --git a/tools/spec-validator/src/spec_validator/__init__.py 
b/tools/spec-validator/src/spec_validator/__init__.py
index 02c53ac7..809b324e 100644
--- a/tools/spec-validator/src/spec_validator/__init__.py
+++ b/tools/spec-validator/src/spec_validator/__init__.py
@@ -28,6 +28,9 @@ Checks every .md file that carries a YAML frontmatter block:
    Behaviour & contract, Out of scope, Acceptance criteria, Validation,
    Known gaps.
 7. Validation section contains at least one fenced code block.
+8. SPDX license header — every spec file must carry the Apache-2.0 SPDX
+   identifier (``<!-- SPDX-License-Identifier: Apache-2.0``) before the
+   opening ``---`` frontmatter delimiter.
 
 Files without frontmatter (README.md, overview.md) are skipped silently.
 
@@ -65,6 +68,9 @@ REQUIRED_SECTIONS: tuple[str, ...] = (
     "Known gaps",
 )
 
+# Every spec .md file must carry this SPDX identifier before the frontmatter.
+SPDX_MARKER = "SPDX-License-Identifier: Apache-2.0"
+
 DEFAULT_SPEC_DIR = Path("tools/spec-loop/specs")
 
 _HTML_COMMENT_RE = re.compile(r"<!--[\s\S]*?-->")
@@ -286,6 +292,35 @@ def validate_body(path: Path, text: str) -> 
list[Violation]:
     return violations
 
 
+# ---------------------------------------------------------------------------
+# SPDX header validation
+# ---------------------------------------------------------------------------
+
+
+def validate_spdx_header(path: Path, text: str) -> list[Violation]:
+    """Check that a spec file carries the Apache-2.0 SPDX license header.
+
+    Every ``.md`` file that has a YAML frontmatter block (i.e. is a spec,
+    not a README or overview) must contain ``SPDX-License-Identifier:
+    Apache-2.0`` somewhere before the opening ``---`` delimiter.  Files
+    without frontmatter are skipped silently — they are not spec files.
+    """
+    if parse_frontmatter(text) is None:
+        return []  # Not a spec file; no SPDX required.
+
+    if SPDX_MARKER not in text:
+        return [
+            Violation(
+                path,
+                1,
+                f"missing SPDX license header — spec files must contain "
+                f"'<!-- {SPDX_MARKER}' before the frontmatter delimiter; "
+                f"see AGENTS.md § Commit and PR conventions",
+            )
+        ]
+    return []
+
+
 # ---------------------------------------------------------------------------
 # Orchestrator
 # ---------------------------------------------------------------------------
@@ -296,7 +331,7 @@ def validate_file(path: Path) -> list[Violation]:
         text = path.read_text(encoding="utf-8")
     except OSError as exc:
         return [Violation(path, None, f"cannot read file: {exc}")]
-    return validate_frontmatter(path, text) + validate_body(path, text)
+    return validate_spdx_header(path, text) + validate_frontmatter(path, text) 
+ validate_body(path, text)
 
 
 def collect_spec_files(target: Path) -> list[Path]:
diff --git a/tools/spec-validator/tests/test_spec_validator.py 
b/tools/spec-validator/tests/test_spec_validator.py
index 4cfeaebe..6b2137de 100644
--- a/tools/spec-validator/tests/test_spec_validator.py
+++ b/tools/spec-validator/tests/test_spec_validator.py
@@ -29,6 +29,7 @@ from spec_validator import (
     ALLOWED_MODE,
     ALLOWED_STATUS,
     REQUIRED_SECTIONS,
+    SPDX_MARKER,
     extract_section_headings,
     get_section_body,
     has_acceptance_items,
@@ -37,6 +38,7 @@ from spec_validator import (
     run_validation,
     validate_body,
     validate_frontmatter,
+    validate_spdx_header,
     validation_has_code_block,
 )
 
@@ -92,8 +94,12 @@ _VALID_SPEC = textwrap.dedent("""\
     """)
 
 
-def _make_spec(*, status: str = "stable", **overrides: str) -> str:
-    """Build a minimal valid spec, replacing frontmatter values as needed."""
+def _make_spec(*, status: str = "stable", spdx: bool = True, **overrides: str) 
-> str:
+    """Build a minimal valid spec, replacing frontmatter values as needed.
+
+    Pass ``spdx=False`` to produce a spec intentionally missing the SPDX
+    header (used by SPDX-check tests).
+    """
     defaults = {
         "title": "Test spec",
         "kind": "feature",
@@ -119,7 +125,8 @@ def _make_spec(*, status: str = "stable", **overrides: str) 
-> str:
         "## Validation\n\n```bash\npytest\n```",
     )
     fm = "\n".join(fm_lines)
-    return f"---\n{fm}\n---\n\n# Test spec\n\n{body_sections}\n"
+    header = f"<!-- {SPDX_MARKER}\n     
https://www.apache.org/licenses/LICENSE-2.0 -->\n\n" if spdx else ""
+    return f"{header}---\n{fm}\n---\n\n# Test spec\n\n{body_sections}\n"
 
 
 # ---------------------------------------------------------------------------
@@ -223,6 +230,55 @@ class TestValidationHasCodeBlock:
         assert validation_has_code_block(text) is False
 
 
+# ---------------------------------------------------------------------------
+# validate_spdx_header
+# ---------------------------------------------------------------------------
+
+
+class TestValidateSpdxHeader:
+    def test_spec_with_spdx_passes(self, tmp_path: Path) -> None:
+        text = _make_spec()  # spdx=True by default
+        p = tmp_path / "spec.md"
+        p.write_text(text)
+        assert validate_spdx_header(p, text) == []
+
+    def test_spec_missing_spdx_flagged(self, tmp_path: Path) -> None:
+        text = _make_spec(spdx=False)
+        p = tmp_path / "spec.md"
+        p.write_text(text)
+        violations = validate_spdx_header(p, text)
+        assert len(violations) == 1
+        assert SPDX_MARKER in violations[0].message
+
+    def test_readme_without_frontmatter_skipped(self, tmp_path: Path) -> None:
+        text = "# README\n\nNo frontmatter, no SPDX required.\n"
+        p = tmp_path / "README.md"
+        p.write_text(text)
+        assert validate_spdx_header(p, text) == []
+
+    def test_valid_spec_fixture_has_spdx(self, tmp_path: Path) -> None:
+        p = tmp_path / "spec.md"
+        p.write_text(_VALID_SPEC)
+        assert validate_spdx_header(p, _VALID_SPEC) == []
+
+    def test_spdx_violation_line_number_is_1(self, tmp_path: Path) -> None:
+        text = _make_spec(spdx=False)
+        p = tmp_path / "spec.md"
+        p.write_text(text)
+        violations = validate_spdx_header(p, text)
+        assert violations[0].line == 1
+
+    def test_run_validation_catches_missing_spdx(self, tmp_path: Path) -> None:
+        (tmp_path / "spec.md").write_text(_make_spec(spdx=False))
+        violations = run_validation(tmp_path)
+        assert any(SPDX_MARKER in v.message for v in violations)
+
+    def test_run_validation_accepts_spec_with_spdx(self, tmp_path: Path) -> 
None:
+        (tmp_path / "spec.md").write_text(_make_spec())
+        violations = [v for v in run_validation(tmp_path) if SPDX_MARKER in 
v.message]
+        assert violations == []
+
+
 # ---------------------------------------------------------------------------
 # validate_frontmatter
 # ---------------------------------------------------------------------------

Reply via email to