This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new b02edf52 lint to make sure skills are ASF-agnostic (#493)
b02edf52 is described below
commit b02edf5215f9d7f5f4e5402f6a86590f2f84e1fd
Author: Justin Mclean <[email protected]>
AuthorDate: Thu Jun 11 22:27:38 2026 +1000
lint to make sure skills are ASF-agnostic (#493)
---
.../src/skill_and_tool_validator/__init__.py | 133 +++++++++++++++++-
.../tests/test_validator.py | 150 +++++++++++++++++++++
2 files changed, 282 insertions(+), 1 deletion(-)
diff --git
a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
index 4b617d71..e6c72cd5 100644
--- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
+++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
@@ -17,7 +17,7 @@
"""Validate framework skill definitions.
-This module validates nine aspects of every skill under
+This module validates ten aspects of every skill under
skills/:
1. YAML frontmatter — every SKILL.md must have a valid frontmatter
@@ -54,6 +54,12 @@ skills/:
``tools/skill-evals/evals/<slug>/``. Missing suites are
advisories so in-flight eval PRs do not block the gate while
their branches are pending review.
+10. ASF-coupling advisory lint (SOFT) — flags ASF-coupled tokens in
+ skill bodies (e.g. svn commands, [email protected], Vulnogram
+ URLs, bare PMC/ICLA/incubator) that a non-ASF adopter cannot
+ satisfy without editing the skill. Each hit is tagged with a
+ remedy class (placeholder / adapter / capability-flag). Never
+ fails the run — advisory only.
SOFT categories surface as advisory warnings (stderr) without
failing the run unless ``--strict`` is passed.
@@ -258,6 +264,10 @@ NAME_CONVENTION_CATEGORY = "name_convention"
# License-header check: every skill .md and non-trivial tool Python file must
# carry the Apache-2.0 SPDX identifier or the full ASF preamble.
LICENSE_HEADER_CATEGORY = "license_header"
+# SOFT advisory: ASF-coupled tokens that a non-ASF adopter cannot satisfy
without
+# editing the skill body. Each hit is tagged with a remedy class so
maintainers
+# know how to generalise it. Never fails the run.
+ASF_COUPLING_CATEGORY = "asf_coupling"
# The `magpie-` namespace prefix every installed framework skill carries.
SKILL_NAME_PREFIX = "magpie-"
@@ -271,6 +281,7 @@ SOFT_CATEGORIES: frozenset[str] = frozenset(
PRIVACY_CATEGORY,
LOWERCASE_F_FIELD_CATEGORY,
EVAL_COVERAGE_CATEGORY,
+ ASF_COUPLING_CATEGORY,
}
)
HARD_CATEGORIES: frozenset[str] = frozenset(
@@ -1687,6 +1698,124 @@ def collect_skill_dirs(root: Path | None = None) ->
set[Path]:
return {p.resolve() for p in base.iterdir() if p.is_dir()}
+# ---------------------------------------------------------------------------
+# ASF-coupling advisory lint (project-agnosticism check)
+# ---------------------------------------------------------------------------
+
+# Tiered ASF-coupled token patterns. Each entry is:
+# (compiled regex, confidence level, remedy class, advisory note)
+# Two tiers:
+# high — almost never legitimate in a non-ASF adopter's workflow.
+# low — common in ASF prose but may appear in examples or config docs.
+_ASF_COUPLING_PATTERNS: list[tuple[re.Pattern[str], str, str, str]] = [
+ # High-confidence: very unlikely to appear legitimately outside ASF
workflows
+ (
+ re.compile(r"\bsvn\s+(?:mv|commit|co|checkout|add|delete|rm)\b"),
+ "high",
+ "adapter",
+ "svn command — use release-dist-backend capability flag or a
distribution adapter",
+ ),
+ (
+ re.compile(r"\bannounce@apache\.org\b"),
+ "high",
+ "capability-flag",
+ "hardcoded [email protected] — use <announce-list> placeholder or
release-announce-backend flag",
+ ),
+ (
+ re.compile(r"\bdist/(?:dev|release)/"),
+ "high",
+ "capability-flag",
+ "ASF dist tree path — use release-dist-backend capability flag",
+ ),
+ (
+ re.compile(r"https?://vulnogram\.github\.io"),
+ "high",
+ "capability-flag",
+ "Vulnogram URL — use <cve-tool-url> placeholder or cve-tool capability
flag",
+ ),
+ # Low-confidence: may appear legitimately in ASF-default prose, examples,
+ # or in lines that already carry a placeholder/flag guard.
+ (
+ re.compile(r"\bPMC\b"),
+ "low",
+ "placeholder",
+ "bare 'PMC' — consider <governance-body> placeholder for non-ASF
adopters",
+ ),
+ (
+ re.compile(r"\bICLA\b"),
+ "low",
+ "capability-flag",
+ "ICLA mention — use contributor-intake-mechanism flag (ICLA vs DCO vs
none)",
+ ),
+ (
+ re.compile(r"\bincubator\b", re.IGNORECASE),
+ "low",
+ "placeholder",
+ "incubator mention — use <project-stage> placeholder or lifecycle
capability flag",
+ ),
+]
+
+# Inline markers that indicate a line already names or guards the ASF coupling,
+# so it should not be flagged again. Applied in addition to
INLINE_ALLOW_MARKERS.
+_ASF_COUPLING_ALLOW_MARKERS: tuple[str, ...] = (
+ # Existing capability-flag names that already generalise the coupling
+ "release-dist-backend",
+ "release_dist_backend",
+ "release-announce-backend",
+ "release_announce_backend",
+ "release_approval_mechanism",
+ "release-approval-mechanism",
+ "contributor-intake-mechanism",
+ "contributor_intake_mechanism",
+ "cve-tool",
+ # Placeholder forms that already generalise the coupling
+ "<announce-list>",
+ "<governance-body>",
+ "<project-stage>",
+ "<cve-tool",
+ # Phrases that explicitly name the ASF default profile context
+ "ASF default",
+ "ASF profile",
+ "ASF adopter",
+ "asf-default",
+)
+
+
+def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]:
+ """Flag ASF-coupled tokens in skill bodies as advisory hints.
+
+ SOFT — advisory only; surfaces on stderr, never fails the run. Each
+ hit is tagged with a confidence level and a remedy class (placeholder /
+ adapter / capability-flag) so maintainers know how to generalise the
+ coupling without regressing the ASF default profile.
+
+ Reuses the existing ALLOWLIST_PATHS and INLINE_ALLOW_MARKERS machinery
+ from validate_placeholders. Additional _ASF_COUPLING_ALLOW_MARKERS
+ cover lines that already name the generalisation mechanism.
+ """
+ if is_path_allowlisted(path):
+ return
+
+ lines = text.splitlines()
+ for line_no, line in enumerate(lines, start=1):
+ # Shared allowlist markers (e.g., "e.g.", "example:") already cover
+ # intentional explanatory mentions.
+ if line_has_inline_allow_marker(line):
+ continue
+ # ASF-coupling-specific markers: line already names the guard
mechanism.
+ if any(marker in line for marker in _ASF_COUPLING_ALLOW_MARKERS):
+ continue
+ for pattern, confidence, remedy, note in _ASF_COUPLING_PATTERNS:
+ m = pattern.search(line)
+ if m:
+ yield Violation(
+ path,
+ line_no,
+ f"asf-coupling [{confidence}] remedy:{remedy} — {note}
(matched: {m.group()!r})",
+ category=ASF_COUPLING_CATEGORY,
+ )
+
+
# ---------------------------------------------------------------------------
# gh list --limit check
# ---------------------------------------------------------------------------
@@ -1794,6 +1923,7 @@ def run_validation(root: Path | None = None) ->
list[Violation]:
violations.extend(validate_principle_compliance(path, text))
violations.extend(validate_privacy_patterns(path, text))
violations.extend(validate_trigger_preservation(path, text,
repo_root=repo_root))
+ violations.extend(validate_asf_coupling(path, text))
# All skill files get link + placeholder + security-pattern checks
violations.extend(validate_links(path, text, skill_dirs, doc_files))
@@ -1885,6 +2015,7 @@ def main(argv: list[str] | None = None) -> int:
_SOFT_RULE_PREFIXES: tuple[str, ...] = (
"action-inventory",
+ "asf-coupling",
"chain-handoff",
"criteria-source",
"distinct-from",
diff --git a/tools/skill-and-tool-validator/tests/test_validator.py
b/tools/skill-and-tool-validator/tests/test_validator.py
index 3fda038f..2f9b9725 100644
--- a/tools/skill-and-tool-validator/tests/test_validator.py
+++ b/tools/skill-and-tool-validator/tests/test_validator.py
@@ -30,6 +30,7 @@ from skill_and_tool_validator import (
_PRIVACY_EXTERNAL_CONTENT_MODES,
ALL_CATEGORIES,
ALLOWED_MODES,
+ ASF_COUPLING_CATEGORY,
EVAL_COVERAGE_CATEGORY,
FORBIDDEN_PATTERNS,
GH_LIST_CATEGORY,
@@ -61,6 +62,7 @@ from skill_and_tool_validator import (
resolve_link,
run_validation,
slugify,
+ validate_asf_coupling,
validate_capability_sync,
validate_eval_coverage,
validate_frontmatter,
@@ -2195,6 +2197,154 @@ def _make_tools_root(tmp_path: Path) -> Path:
return root
+class TestValidateAsfCoupling:
+ """Tests for the SOFT ASF-coupling advisory lint."""
+
+ def _skill(self, body: str) -> str:
+ """Wrap body in a minimal valid SKILL.md."""
+ return (
+ "---\n"
+ "name: magpie-test\n"
+ "description: Test skill.\n"
+ "license: Apache-2.0\n"
+ "capability: capability:triage\n"
+ "---\n" + body
+ )
+
+ # --- High-confidence patterns ---
+
+ def test_svn_commit_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Run `svn
commit -m 'release'`\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY for v in violations)
+ assert any("svn" in v.message for v in violations)
+
+ def test_svn_mv_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Run `svn mv
dev/ release/`\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message
for v in violations)
+
+ def test_announce_at_apache_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Send mail
to [email protected]\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message
for v in violations)
+
+ def test_dist_dev_path_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Upload to
dist/dev/myproject\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message
for v in violations)
+
+ def test_vulnogram_url_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(
+ validate_asf_coupling(path, self._skill("Open
https://vulnogram.github.io to file the CVE.\n"))
+ )
+ assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message
for v in violations)
+
+ # --- Low-confidence patterns ---
+
+ def test_bare_pmc_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("The PMC
votes on this release.\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message
for v in violations)
+
+ def test_icla_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Contributor
must sign the ICLA first.\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message
for v in violations)
+
+ def test_incubator_flagged(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("This
project is in the Incubator.\n")))
+ assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message
for v in violations)
+
+ # --- Remedy classes are reported ---
+
+ def test_remedy_class_in_message(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(validate_asf_coupling(path, self._skill("Run `svn co
https://...\n")))
+ coupling = [v for v in violations if v.category ==
ASF_COUPLING_CATEGORY]
+ assert coupling
+ assert "remedy:" in coupling[0].message
+
+ # --- Allowlisted paths are skipped ---
+
+ def test_allowlisted_path_skipped(self, tmp_path: Path) -> None:
+ """Files under projects/_template/ must not be flagged."""
+ template_dir = tmp_path / "projects" / "_template"
+ template_dir.mkdir(parents=True)
+ path = template_dir / "release-management-config.md"
+ violations = list(validate_asf_coupling(path, "Upload to
dist/dev/myproject\n"))
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+ # --- Inline allow markers suppress the hit ---
+
+ def test_eg_marker_suppresses(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ # "e.g." is an INLINE_ALLOW_MARKER — the line should be skipped.
+ violations = list(
+ validate_asf_coupling(
+ path,
+ self._skill("e.g. for ASF use [email protected] as the
announce list\n"),
+ )
+ )
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+ def test_example_marker_suppresses(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(
+ validate_asf_coupling(
+ path,
+ self._skill("example: PMC votes on this — replace with
<governance-body>\n"),
+ )
+ )
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+ # --- ASF-coupling-specific allow markers suppress the hit ---
+
+ def test_capability_flag_allow_marker_suppresses(self, tmp_path: Path) ->
None:
+ """A line that already names release-dist-backend should not be
re-flagged."""
+ path = tmp_path / "SKILL.md"
+ violations = list(
+ validate_asf_coupling(
+ path,
+ self._skill("If release-dist-backend is 'svn', run `svn
commit` to publish.\n"),
+ )
+ )
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+ def test_asf_default_allow_marker_suppresses(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ violations = list(
+ validate_asf_coupling(
+ path,
+ self._skill("ASF default: send mail to
[email protected].\n"),
+ )
+ )
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+ # --- Category membership ---
+
+ def test_category_is_soft(self) -> None:
+ assert ASF_COUPLING_CATEGORY in SOFT_CATEGORIES
+
+ def test_category_in_all_categories(self) -> None:
+ assert ASF_COUPLING_CATEGORY in ALL_CATEGORIES
+
+ # --- Clean skill produces no violations ---
+
+ def test_clean_skill_no_violations(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ clean_body = (
+ "## Workflow\n\n"
+ "1. Propose release to the <governance-body>.\n"
+ "2. Upload artifacts to <dist-path>.\n"
+ "3. Send the vote email to <announce-list>.\n"
+ )
+ violations = list(validate_asf_coupling(path, self._skill(clean_body)))
+ assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+
class TestValidateTools:
def test_tool_with_valid_readme(self, tmp_path: Path) -> None:
root = _make_tools_root(tmp_path)