This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git


The following commit(s) were added to refs/heads/main by this push:
     new b02edf52 lint to make sure skills are ASF-agnostic (#493)
b02edf52 is described below

commit b02edf5215f9d7f5f4e5402f6a86590f2f84e1fd
Author: Justin Mclean <[email protected]>
AuthorDate: Thu Jun 11 22:27:38 2026 +1000

    lint to make sure skills are ASF-agnostic (#493)
---
 .../src/skill_and_tool_validator/__init__.py       | 133 +++++++++++++++++-
 .../tests/test_validator.py                        | 150 +++++++++++++++++++++
 2 files changed, 282 insertions(+), 1 deletion(-)

diff --git 
a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py 
b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
index 4b617d71..e6c72cd5 100644
--- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
+++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py
@@ -17,7 +17,7 @@
 
 """Validate framework skill definitions.
 
-This module validates nine aspects of every skill under
+This module validates ten aspects of every skill under
 skills/:
 
 1. YAML frontmatter — every SKILL.md must have a valid frontmatter
@@ -54,6 +54,12 @@ skills/:
    ``tools/skill-evals/evals/<slug>/``.  Missing suites are
    advisories so in-flight eval PRs do not block the gate while
    their branches are pending review.
+10. ASF-coupling advisory lint (SOFT) — flags ASF-coupled tokens in
+    skill bodies (e.g. svn commands, [email protected], Vulnogram
+    URLs, bare PMC/ICLA/incubator) that a non-ASF adopter cannot
+    satisfy without editing the skill.  Each hit is tagged with a
+    remedy class (placeholder / adapter / capability-flag).  Never
+    fails the run — advisory only.
 
 SOFT categories surface as advisory warnings (stderr) without
 failing the run unless ``--strict`` is passed.
@@ -258,6 +264,10 @@ NAME_CONVENTION_CATEGORY = "name_convention"
 # License-header check: every skill .md and non-trivial tool Python file must
 # carry the Apache-2.0 SPDX identifier or the full ASF preamble.
 LICENSE_HEADER_CATEGORY = "license_header"
+# SOFT advisory: ASF-coupled tokens that a non-ASF adopter cannot satisfy 
without
+# editing the skill body.  Each hit is tagged with a remedy class so 
maintainers
+# know how to generalise it.  Never fails the run.
+ASF_COUPLING_CATEGORY = "asf_coupling"
 
 # The `magpie-` namespace prefix every installed framework skill carries.
 SKILL_NAME_PREFIX = "magpie-"
@@ -271,6 +281,7 @@ SOFT_CATEGORIES: frozenset[str] = frozenset(
         PRIVACY_CATEGORY,
         LOWERCASE_F_FIELD_CATEGORY,
         EVAL_COVERAGE_CATEGORY,
+        ASF_COUPLING_CATEGORY,
     }
 )
 HARD_CATEGORIES: frozenset[str] = frozenset(
@@ -1687,6 +1698,124 @@ def collect_skill_dirs(root: Path | None = None) -> 
set[Path]:
     return {p.resolve() for p in base.iterdir() if p.is_dir()}
 
 
+# ---------------------------------------------------------------------------
+# ASF-coupling advisory lint (project-agnosticism check)
+# ---------------------------------------------------------------------------
+
+# Tiered ASF-coupled token patterns.  Each entry is:
+#   (compiled regex, confidence level, remedy class, advisory note)
+# Two tiers:
+#   high — almost never legitimate in a non-ASF adopter's workflow.
+#   low  — common in ASF prose but may appear in examples or config docs.
+_ASF_COUPLING_PATTERNS: list[tuple[re.Pattern[str], str, str, str]] = [
+    # High-confidence: very unlikely to appear legitimately outside ASF 
workflows
+    (
+        re.compile(r"\bsvn\s+(?:mv|commit|co|checkout|add|delete|rm)\b"),
+        "high",
+        "adapter",
+        "svn command — use release-dist-backend capability flag or a 
distribution adapter",
+    ),
+    (
+        re.compile(r"\bannounce@apache\.org\b"),
+        "high",
+        "capability-flag",
+        "hardcoded [email protected] — use <announce-list> placeholder or 
release-announce-backend flag",
+    ),
+    (
+        re.compile(r"\bdist/(?:dev|release)/"),
+        "high",
+        "capability-flag",
+        "ASF dist tree path — use release-dist-backend capability flag",
+    ),
+    (
+        re.compile(r"https?://vulnogram\.github\.io"),
+        "high",
+        "capability-flag",
+        "Vulnogram URL — use <cve-tool-url> placeholder or cve-tool capability 
flag",
+    ),
+    # Low-confidence: may appear legitimately in ASF-default prose, examples,
+    # or in lines that already carry a placeholder/flag guard.
+    (
+        re.compile(r"\bPMC\b"),
+        "low",
+        "placeholder",
+        "bare 'PMC' — consider <governance-body> placeholder for non-ASF 
adopters",
+    ),
+    (
+        re.compile(r"\bICLA\b"),
+        "low",
+        "capability-flag",
+        "ICLA mention — use contributor-intake-mechanism flag (ICLA vs DCO vs 
none)",
+    ),
+    (
+        re.compile(r"\bincubator\b", re.IGNORECASE),
+        "low",
+        "placeholder",
+        "incubator mention — use <project-stage> placeholder or lifecycle 
capability flag",
+    ),
+]
+
+# Inline markers that indicate a line already names or guards the ASF coupling,
+# so it should not be flagged again.  Applied in addition to 
INLINE_ALLOW_MARKERS.
+_ASF_COUPLING_ALLOW_MARKERS: tuple[str, ...] = (
+    # Existing capability-flag names that already generalise the coupling
+    "release-dist-backend",
+    "release_dist_backend",
+    "release-announce-backend",
+    "release_announce_backend",
+    "release_approval_mechanism",
+    "release-approval-mechanism",
+    "contributor-intake-mechanism",
+    "contributor_intake_mechanism",
+    "cve-tool",
+    # Placeholder forms that already generalise the coupling
+    "<announce-list>",
+    "<governance-body>",
+    "<project-stage>",
+    "<cve-tool",
+    # Phrases that explicitly name the ASF default profile context
+    "ASF default",
+    "ASF profile",
+    "ASF adopter",
+    "asf-default",
+)
+
+
+def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]:
+    """Flag ASF-coupled tokens in skill bodies as advisory hints.
+
+    SOFT — advisory only; surfaces on stderr, never fails the run.  Each
+    hit is tagged with a confidence level and a remedy class (placeholder /
+    adapter / capability-flag) so maintainers know how to generalise the
+    coupling without regressing the ASF default profile.
+
+    Reuses the existing ALLOWLIST_PATHS and INLINE_ALLOW_MARKERS machinery
+    from validate_placeholders.  Additional _ASF_COUPLING_ALLOW_MARKERS
+    cover lines that already name the generalisation mechanism.
+    """
+    if is_path_allowlisted(path):
+        return
+
+    lines = text.splitlines()
+    for line_no, line in enumerate(lines, start=1):
+        # Shared allowlist markers (e.g., "e.g.", "example:") already cover
+        # intentional explanatory mentions.
+        if line_has_inline_allow_marker(line):
+            continue
+        # ASF-coupling-specific markers: line already names the guard 
mechanism.
+        if any(marker in line for marker in _ASF_COUPLING_ALLOW_MARKERS):
+            continue
+        for pattern, confidence, remedy, note in _ASF_COUPLING_PATTERNS:
+            m = pattern.search(line)
+            if m:
+                yield Violation(
+                    path,
+                    line_no,
+                    f"asf-coupling [{confidence}] remedy:{remedy} — {note} 
(matched: {m.group()!r})",
+                    category=ASF_COUPLING_CATEGORY,
+                )
+
+
 # ---------------------------------------------------------------------------
 # gh list --limit check
 # ---------------------------------------------------------------------------
@@ -1794,6 +1923,7 @@ def run_validation(root: Path | None = None) -> 
list[Violation]:
             violations.extend(validate_principle_compliance(path, text))
             violations.extend(validate_privacy_patterns(path, text))
             violations.extend(validate_trigger_preservation(path, text, 
repo_root=repo_root))
+            violations.extend(validate_asf_coupling(path, text))
 
         # All skill files get link + placeholder + security-pattern checks
         violations.extend(validate_links(path, text, skill_dirs, doc_files))
@@ -1885,6 +2015,7 @@ def main(argv: list[str] | None = None) -> int:
 
 _SOFT_RULE_PREFIXES: tuple[str, ...] = (
     "action-inventory",
+    "asf-coupling",
     "chain-handoff",
     "criteria-source",
     "distinct-from",
diff --git a/tools/skill-and-tool-validator/tests/test_validator.py 
b/tools/skill-and-tool-validator/tests/test_validator.py
index 3fda038f..2f9b9725 100644
--- a/tools/skill-and-tool-validator/tests/test_validator.py
+++ b/tools/skill-and-tool-validator/tests/test_validator.py
@@ -30,6 +30,7 @@ from skill_and_tool_validator import (
     _PRIVACY_EXTERNAL_CONTENT_MODES,
     ALL_CATEGORIES,
     ALLOWED_MODES,
+    ASF_COUPLING_CATEGORY,
     EVAL_COVERAGE_CATEGORY,
     FORBIDDEN_PATTERNS,
     GH_LIST_CATEGORY,
@@ -61,6 +62,7 @@ from skill_and_tool_validator import (
     resolve_link,
     run_validation,
     slugify,
+    validate_asf_coupling,
     validate_capability_sync,
     validate_eval_coverage,
     validate_frontmatter,
@@ -2195,6 +2197,154 @@ def _make_tools_root(tmp_path: Path) -> Path:
     return root
 
 
+class TestValidateAsfCoupling:
+    """Tests for the SOFT ASF-coupling advisory lint."""
+
+    def _skill(self, body: str) -> str:
+        """Wrap body in a minimal valid SKILL.md."""
+        return (
+            "---\n"
+            "name: magpie-test\n"
+            "description: Test skill.\n"
+            "license: Apache-2.0\n"
+            "capability: capability:triage\n"
+            "---\n" + body
+        )
+
+    # --- High-confidence patterns ---
+
+    def test_svn_commit_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Run `svn 
commit -m 'release'`\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY for v in violations)
+        assert any("svn" in v.message for v in violations)
+
+    def test_svn_mv_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Run `svn mv 
dev/ release/`\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message 
for v in violations)
+
+    def test_announce_at_apache_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Send mail 
to [email protected]\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message 
for v in violations)
+
+    def test_dist_dev_path_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Upload to 
dist/dev/myproject\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message 
for v in violations)
+
+    def test_vulnogram_url_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(
+            validate_asf_coupling(path, self._skill("Open 
https://vulnogram.github.io to file the CVE.\n"))
+        )
+        assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message 
for v in violations)
+
+    # --- Low-confidence patterns ---
+
+    def test_bare_pmc_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("The PMC 
votes on this release.\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message 
for v in violations)
+
+    def test_icla_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Contributor 
must sign the ICLA first.\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message 
for v in violations)
+
+    def test_incubator_flagged(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("This 
project is in the Incubator.\n")))
+        assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message 
for v in violations)
+
+    # --- Remedy classes are reported ---
+
+    def test_remedy_class_in_message(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(validate_asf_coupling(path, self._skill("Run `svn co 
https://...\n";)))
+        coupling = [v for v in violations if v.category == 
ASF_COUPLING_CATEGORY]
+        assert coupling
+        assert "remedy:" in coupling[0].message
+
+    # --- Allowlisted paths are skipped ---
+
+    def test_allowlisted_path_skipped(self, tmp_path: Path) -> None:
+        """Files under projects/_template/ must not be flagged."""
+        template_dir = tmp_path / "projects" / "_template"
+        template_dir.mkdir(parents=True)
+        path = template_dir / "release-management-config.md"
+        violations = list(validate_asf_coupling(path, "Upload to 
dist/dev/myproject\n"))
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+    # --- Inline allow markers suppress the hit ---
+
+    def test_eg_marker_suppresses(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        # "e.g." is an INLINE_ALLOW_MARKER — the line should be skipped.
+        violations = list(
+            validate_asf_coupling(
+                path,
+                self._skill("e.g. for ASF use [email protected] as the 
announce list\n"),
+            )
+        )
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+    def test_example_marker_suppresses(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(
+            validate_asf_coupling(
+                path,
+                self._skill("example: PMC votes on this — replace with 
<governance-body>\n"),
+            )
+        )
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+    # --- ASF-coupling-specific allow markers suppress the hit ---
+
+    def test_capability_flag_allow_marker_suppresses(self, tmp_path: Path) -> 
None:
+        """A line that already names release-dist-backend should not be 
re-flagged."""
+        path = tmp_path / "SKILL.md"
+        violations = list(
+            validate_asf_coupling(
+                path,
+                self._skill("If release-dist-backend is 'svn', run `svn 
commit` to publish.\n"),
+            )
+        )
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+    def test_asf_default_allow_marker_suppresses(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        violations = list(
+            validate_asf_coupling(
+                path,
+                self._skill("ASF default: send mail to 
[email protected].\n"),
+            )
+        )
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+    # --- Category membership ---
+
+    def test_category_is_soft(self) -> None:
+        assert ASF_COUPLING_CATEGORY in SOFT_CATEGORIES
+
+    def test_category_in_all_categories(self) -> None:
+        assert ASF_COUPLING_CATEGORY in ALL_CATEGORIES
+
+    # --- Clean skill produces no violations ---
+
+    def test_clean_skill_no_violations(self, tmp_path: Path) -> None:
+        path = tmp_path / "SKILL.md"
+        clean_body = (
+            "## Workflow\n\n"
+            "1. Propose release to the <governance-body>.\n"
+            "2. Upload artifacts to <dist-path>.\n"
+            "3. Send the vote email to <announce-list>.\n"
+        )
+        violations = list(validate_asf_coupling(path, self._skill(clean_body)))
+        assert all(v.category != ASF_COUPLING_CATEGORY for v in violations)
+
+
 class TestValidateTools:
     def test_tool_with_valid_readme(self, tmp_path: Path) -> None:
         root = _make_tools_root(tmp_path)

Reply via email to