This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new b9258c3 fix(skill-validator): match GitHub anchor algorithm for
repeated whitespace (#65)
b9258c3 is described below
commit b9258c3ea24664fb4ae2d392c1fca6e0ae254347
Author: André Ahlert <[email protected]>
AuthorDate: Wed May 6 12:07:24 2026 -0300
fix(skill-validator): match GitHub anchor algorithm for repeated whitespace
(#65)
`slugify` was using `re.sub(r"[\s]+", "-", text)` which collapses runs of
whitespace into a single dash. GitHub's anchor renderer (and doctoc)
replace each whitespace character one-for-one, so headings whose text
contains an em-dash (which strips to "" between two surrounding spaces)
yield double-dash anchors. Concretely, "Mode B — conversational
mentoring" must slugify to `mode-b--conversational-mentoring`, matching
the doctoc-generated TOC anchor in `docs/modes.md`.
Validator was therefore producing single-dash slugs and reporting ~38
false-positive "anchor not found" violations against the real repo.
Drop the `+` quantifier so each whitespace becomes its own dash, and
update the existing `test_multiple_spaces` expectation to match the
actual GitHub algorithm. Add `test_em_dash_in_heading` to pin the
canonical case.
---
tools/skill-validator/src/skill_validator/__init__.py | 2 +-
tools/skill-validator/tests/test_validator.py | 12 +++++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/tools/skill-validator/src/skill_validator/__init__.py
b/tools/skill-validator/src/skill_validator/__init__.py
index fe93b32..cb4b768 100644
--- a/tools/skill-validator/src/skill_validator/__init__.py
+++ b/tools/skill-validator/src/skill_validator/__init__.py
@@ -111,7 +111,7 @@ LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
# Anchor slug generation — mirrors doctoc/GitHub logic loosely.
ANCHOR_PATTERN = re.compile(r"[^\w\s-]+")
-ANCHOR_SPACE_PATTERN = re.compile(r"[\s]+")
+ANCHOR_SPACE_PATTERN = re.compile(r"\s")
# ---------------------------------------------------------------------------
diff --git a/tools/skill-validator/tests/test_validator.py
b/tools/skill-validator/tests/test_validator.py
index b346903..3b991f8 100644
--- a/tools/skill-validator/tests/test_validator.py
+++ b/tools/skill-validator/tests/test_validator.py
@@ -159,7 +159,17 @@ class TestSlugify:
assert slugify("What's new?") == "whats-new"
def test_multiple_spaces(self) -> None:
- assert slugify("A B C") == "a-b-c"
+ # GitHub's anchor algorithm replaces each whitespace character with
+ # a dash one-for-one rather than collapsing runs. Doctoc and the
+ # GitHub renderer agree on this; the canonical case is em-dash
+ # headings, which strip to "" and leave two adjacent spaces.
+ assert slugify("A B C") == "a--b---c"
+
+ def test_em_dash_in_heading(self) -> None:
+ assert (
+ slugify("Mode B — conversational mentoring")
+ == "mode-b--conversational-mentoring"
+ )
class TestExtractHeadings: