This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new 59e1930 catch `--body=` equals-sign variant and fix corpus hits
(#217)
59e1930 is described below
commit 59e1930e3545823b5d943b14a14330603fdb57f3
Author: Justin Mclean <[email protected]>
AuthorDate: Tue May 19 01:59:14 2026 +0800
catch `--body=` equals-sign variant and fix corpus hits (#217)
* catch `--body=` equals-sign variant and fix corpus hits
* fix fomatting
---
.../skills/pr-management-code-review/posting.md | 15 ++-
.claude/skills/security-issue-fix/SKILL.md | 2 +-
.claude/skills/security-issue-triage/SKILL.md | 3 +-
.claude/skills/setup-override-upstream/SKILL.md | 9 +-
.claude/skills/write-skill/security-checklist.md | 3 +-
.../src/skill_validator/__init__.py | 80 +++++++++++++++-
tools/skill-validator/tests/test_validator.py | 103 +++++++++++++++++++++
7 files changed, 197 insertions(+), 18 deletions(-)
diff --git a/.claude/skills/pr-management-code-review/posting.md
b/.claude/skills/pr-management-code-review/posting.md
index 04cf67d..c00d65b 100644
--- a/.claude/skills/pr-management-code-review/posting.md
+++ b/.claude/skills/pr-management-code-review/posting.md
@@ -35,32 +35,31 @@ Golden rule 8 downgrades any auto-`APPROVE` if CI is
failing.
### Approve
```bash
-gh pr review <N> --repo <repo> --approve --body "$(cat <<'EOF'
+cat > /tmp/review-body.md << 'EOF'
[review body here]
EOF
-)"
+gh pr review <N> --repo <repo> --approve --body-file /tmp/review-body.md
```
### Request changes
```bash
-gh pr review <N> --repo <repo> --request-changes --body "$(cat <<'EOF'
+cat > /tmp/review-body.md << 'EOF'
[review body here]
EOF
-)"
+gh pr review <N> --repo <repo> --request-changes --body-file
/tmp/review-body.md
```
### Comment
```bash
-gh pr review <N> --repo <repo> --comment --body "$(cat <<'EOF'
+cat > /tmp/review-body.md << 'EOF'
[review body here]
EOF
-)"
+gh pr review <N> --repo <repo> --comment --body-file /tmp/review-body.md
```
-The skill always uses **here-doc body passing** (never `--body
-"$STRING"` with quotes) to avoid shell-escape mishaps with PR
+The skill always uses **body-file passing** (never `--body "$STRING"` with
quotes) to avoid shell-escape mishaps with PR
content that may contain backticks, dollar signs, or quotes.
### Self-review guard
diff --git a/.claude/skills/security-issue-fix/SKILL.md
b/.claude/skills/security-issue-fix/SKILL.md
index 6ca4949..5d8dbef 100644
--- a/.claude/skills/security-issue-fix/SKILL.md
+++ b/.claude/skills/security-issue-fix/SKILL.md
@@ -565,7 +565,7 @@ browser before actually submitting the PR — matching the
rule in
```bash
gh pr create --web --repo <upstream> --base <base-branch> \
--title "<neutral title>" \
- --body "$(cat /tmp/pr-body-<issue>.md)"
+ --body-file /tmp/pr-body-<issue>.md
```
If a backport label is needed, apply it via `gh` after the PR is
diff --git a/.claude/skills/security-issue-triage/SKILL.md
b/.claude/skills/security-issue-triage/SKILL.md
index a946df6..c62df06 100644
--- a/.claude/skills/security-issue-triage/SKILL.md
+++ b/.claude/skills/security-issue-triage/SKILL.md
@@ -769,8 +769,7 @@ gh issue comment <N> --repo <tracker> --body-file <tmpfile>
Use the
[`tools/github/issue-template.md`](../../../tools/github/issue-template.md)
-file-via-Write-tool pattern for the body — `gh issue comment
---body '<x>'` permits shell expansion of `$(...)` inside double
+file-via-Write-tool pattern for the body — `gh issue comment --body '<x>'`
permits shell expansion of `$(...)` inside double
quotes, and the comment body inevitably contains user-supplied
text from the tracker (which crossed a trust boundary at
import time). Write the body to `/tmp/triage-<N>.md` via the
diff --git a/.claude/skills/setup-override-upstream/SKILL.md
b/.claude/skills/setup-override-upstream/SKILL.md
index 6868fbc..499f62d 100644
--- a/.claude/skills/setup-override-upstream/SKILL.md
+++ b/.claude/skills/setup-override-upstream/SKILL.md
@@ -276,8 +276,13 @@ In `<framework-clone>`:
3. **Confirm with the user before posting**. Show the
exact title + body. Wait for "OK to post" / "yes" /
"send" / similar before running `gh pr create`.
-4. `gh pr create --repo apache/airflow-steward --base
- main --head <user>:<branch> --title "..." --body "..."`
+4. Write the PR body to a temp file, then post:
+
+ ```bash
+ gh pr create --repo apache/airflow-steward --base main \
+ --head <user>:<branch> --title "..." \
+ --body-file /tmp/pr-body.md
+ ```
### Step 7 — Post-PR cleanup pointer
diff --git a/.claude/skills/write-skill/security-checklist.md
b/.claude/skills/write-skill/security-checklist.md
index ddb20d0..234c736 100644
--- a/.claude/skills/write-skill/security-checklist.md
+++ b/.claude/skills/write-skill/security-checklist.md
@@ -211,8 +211,7 @@ re-reads in fresh agent contexts would otherwise see.
## Pattern 9 — `--body-file <path>`, never `--body "..."`
Use `gh issue create --body-file <path>` and `gh issue comment
---body-file <path>` exclusively. The string-form `--body
-"$(cat …)"` re-introduces shell expansion of the file's content
+--body-file <path>` exclusively. The string-form `--body "$(cat …)"`
re-introduces shell expansion of the file's content
through the outer double-quoted argument, defeating the point of
moving the content to a file. The `--body-file` form reads the
file directly, no expansion.
diff --git a/tools/skill-validator/src/skill_validator/__init__.py
b/tools/skill-validator/src/skill_validator/__init__.py
index b3ae2e4..22238d5 100644
--- a/tools/skill-validator/src/skill_validator/__init__.py
+++ b/tools/skill-validator/src/skill_validator/__init__.py
@@ -130,8 +130,9 @@ MAX_METADATA_CHARS = 1536
PRINCIPLE_CATEGORY = "principle_compliance"
TRIGGER_PRESERVATION_CATEGORY = "trigger_preservation"
+BODY_INLINE_CATEGORY = "body_inline"
SOFT_CATEGORIES: frozenset[str] = frozenset(
- {PRINCIPLE_CATEGORY, TRIGGER_PRESERVATION_CATEGORY},
+ {PRINCIPLE_CATEGORY, TRIGGER_PRESERVATION_CATEGORY, BODY_INLINE_CATEGORY},
)
ACTION_INVENTORY_COMMA_THRESHOLD = 5
@@ -325,6 +326,13 @@ def extract_headings(text: str) -> set[str]:
return slugs
+# Matches ``--body "..."`` / ``--body '...'`` / ``--body="..."`` /
``--body='...'``.
+# The ``[\s=]`` character class covers both the space-separated form (common in
+# multi-line shell scripts) and the equals-sign form (common in one-liners).
+# Using ``--body-file`` instead avoids shell-injection risk from unquoted
+# or attacker-controlled content.
+_BODY_INLINE_RE = re.compile(r'--body[\s=]["\']')
+
_FENCED_CODE_RE = re.compile(r"^```[\s\S]*?^```", re.MULTILINE)
_DOUBLE_BACKTICK_RE = re.compile(r"``[\s\S]+?``")
_SINGLE_BACKTICK_RE = re.compile(r"`[^`\n]+`")
@@ -675,6 +683,70 @@ def collect_skill_dirs(root: Path | None = None) ->
set[Path]:
return {p.resolve() for p in base.iterdir() if p.is_dir()}
+# ---------------------------------------------------------------------------
+# --body inline check (Pattern 9)
+# ---------------------------------------------------------------------------
+
+# Files that intentionally document the bad --body "..." pattern and must not
+# be flagged. The security checklist uses nested 4- and 5-backtick fences for
+# embedded code-block demos; those confuse _FENCED_CODE_RE /
_DOUBLE_BACKTICK_RE
+# and leave prose ``--body "..."`` mentions outside any detected code span.
+_BODY_INLINE_SKIP_SUFFIXES: tuple[str, ...] =
("write-skill/security-checklist.md",)
+
+
+def _inline_only_code_spans(text: str) -> list[tuple[int, int]]:
+ """Return (start, end) spans for *inline* backtick code only.
+
+ Fenced code blocks are excluded so that security-pattern checks can
+ inspect fenced-block content (real agent commands) while skipping
+ inline backtick snippets that appear in instructional prose
+ (e.g. ``never use --body "..."``).
+
+ Uses position-based exclusion: any span fully contained within a
+ fenced block is dropped, regardless of the exact tuple values returned
+ by ``_code_spans`` (which can produce partially-overlapping spans for
+ the opening backticks of a fenced block).
+ """
+ fenced_spans = [m.span() for m in _FENCED_CODE_RE.finditer(text)]
+ return [
+ (start, end)
+ for start, end in _code_spans(text)
+ if not any(fs <= start and end <= fe for fs, fe in fenced_spans)
+ ]
+
+
+def validate_body_inline(path: Path, text: str) -> Iterable[Violation]:
+ """Flag ``--body "..."`` / ``--body '...'`` / ``--body=...`` in fenced
blocks.
+
+ Passing a body as an inline shell argument is a shell-injection vector:
+ the value may contain attacker-controlled content (PR titles, issue
+ bodies, commit messages) that can break the quoting and inject
+ arbitrary shell commands. ``--body-file <path>`` writes the content
+ to a temp file first and sidesteps the problem entirely.
+
+ Both the space-separated form (``--body "text"``) and the equals-sign
+ form (``--body="text"``) are caught. Inline backtick mentions in
+ prose (e.g. "avoid ``--body '...'``") are skipped.
+
+ All violations are **SOFT** — advisory only.
+ """
+ if any(str(path).endswith(suffix) for suffix in
_BODY_INLINE_SKIP_SUFFIXES):
+ return
+ inline_spans = _inline_only_code_spans(text)
+ for m in _BODY_INLINE_RE.finditer(text):
+ if any(s <= m.start() < e for s, e in inline_spans):
+ continue
+ line_no = text[: m.start()].count("\n") + 1
+ yield Violation(
+ path,
+ line_no,
+ f"body-inline: {m.group().strip()!r} passes a body as an inline
shell "
+ f"argument — use '--body-file <path>' instead to avoid "
+ f"shell-injection risk (see write-skill/security-checklist.md §
Pattern 9)",
+ category=BODY_INLINE_CATEGORY,
+ )
+
+
def collect_doc_files(root: Path | None = None) -> set[Path]:
"""Return every .md file under docs/ and projects/_template/."""
repo_root = root or find_repo_root()
@@ -710,6 +782,7 @@ def run_validation(root: Path | None = None) ->
list[Violation]:
# All skill files get link + placeholder validation
violations.extend(validate_links(path, text, skill_dirs, doc_files))
violations.extend(validate_placeholders(path, text))
+ violations.extend(validate_body_inline(path, text))
return violations
@@ -765,10 +838,11 @@ def main(argv: list[str] | None = None) -> int:
_SOFT_RULE_PREFIXES: tuple[str, ...] = (
"action-inventory",
- "distinct-from",
+ "body-inline",
"chain-handoff",
- "parenthetical rationale",
"criteria-source",
+ "distinct-from",
+ "parenthetical rationale",
"trigger phrase",
)
diff --git a/tools/skill-validator/tests/test_validator.py
b/tools/skill-validator/tests/test_validator.py
index c62bcbf..9a3ddee 100644
--- a/tools/skill-validator/tests/test_validator.py
+++ b/tools/skill-validator/tests/test_validator.py
@@ -24,6 +24,7 @@ from pathlib import Path
import pytest
from skill_validator import (
+ BODY_INLINE_CATEGORY,
FORBIDDEN_PATTERNS,
MAX_METADATA_CHARS,
PRINCIPLE_CATEGORY,
@@ -35,6 +36,7 @@ from skill_validator import (
resolve_link,
run_validation,
slugify,
+ validate_body_inline,
validate_frontmatter,
validate_links,
validate_placeholders,
@@ -612,6 +614,106 @@ class TestTriggerPreservation:
assert "'beta'" in violations[0].message
+# ---------------------------------------------------------------------------
+# body-inline check (Pattern 9 extension)
+# ---------------------------------------------------------------------------
+
+
+def _fenced_skill(cmd: str) -> str:
+ """Wrap *cmd* in a minimal SKILL.md with a fenced bash block."""
+ frontmatter = "---\nname: test\ndescription: test\nlicense:
Apache-2.0\n---\n\n"
+ return frontmatter + f"```bash\n{cmd}\n```\n"
+
+
+class TestBodyInline:
+ def test_no_body_arg_silent(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill("gh issue create --title 'Bug' --body-file
/tmp/body.txt")
+ violations = list(validate_body_inline(path, text))
+ assert violations == []
+
+ def test_body_space_double_quote_fenced_flagged(self, tmp_path: Path) ->
None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill('gh issue create --title "T" --body "some text"')
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == BODY_INLINE_CATEGORY
+ assert "body-inline" in violations[0].message
+
+ def test_body_space_single_quote_fenced_flagged(self, tmp_path: Path) ->
None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill("gh issue create --title T --body 'some text'")
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == BODY_INLINE_CATEGORY
+
+ def test_body_equals_double_quote_fenced_flagged(self, tmp_path: Path) ->
None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill('gh issue create --body="some text"')
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == BODY_INLINE_CATEGORY
+
+ def test_body_equals_single_quote_fenced_flagged(self, tmp_path: Path) ->
None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill("gh issue create --body='some text'")
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert violations[0].category == BODY_INLINE_CATEGORY
+
+ def test_inline_backtick_mention_skipped(self, tmp_path: Path) -> None:
+ """Prose like ``never use --body "..."`` should not fire."""
+ path = tmp_path / "SKILL.md"
+ text = (
+ "---\nname: test\ndescription: test\nlicense: Apache-2.0\n---\n\n"
+ 'Do not use `--body "text"` — prefer `--body-file` instead.\n'
+ )
+ violations = list(validate_body_inline(path, text))
+ assert violations == []
+
+ def test_body_file_not_flagged(self, tmp_path: Path) -> None:
+ """``--body-file`` must never be flagged — it is the correct form."""
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill("gh issue create --title T --body-file
/tmp/b.txt")
+ violations = list(validate_body_inline(path, text))
+ assert violations == []
+
+ def test_violation_line_number_correct(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ # _fenced_skill layout (1-indexed):
+ # 1: ---
+ # 2: name: test
+ # 3: description: test
+ # 4: license: Apache-2.0
+ # 5: ---
+ # 6: (blank)
+ # 7: ```bash
+ # 8: gh issue create --body "text" ← violation here
+ # 9: ```
+ text = _fenced_skill('gh issue create --body "text"')
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert violations[0].line == 8
+
+ def test_body_inline_is_soft(self) -> None:
+ assert BODY_INLINE_CATEGORY in SOFT_CATEGORIES
+
+ def test_message_references_body_file(self, tmp_path: Path) -> None:
+ path = tmp_path / "SKILL.md"
+ text = _fenced_skill('gh pr create --body "description"')
+ violations = list(validate_body_inline(path, text))
+ assert len(violations) == 1
+ assert "--body-file" in violations[0].message
+
+ def test_security_checklist_skipped(self, tmp_path: Path) -> None:
+ """security-checklist.md documents bad patterns intentionally — must
not fire."""
+ path = tmp_path / "write-skill" / "security-checklist.md"
+ path.parent.mkdir(parents=True)
+ text = _fenced_skill('gh issue create --body "bad pattern documented
here"')
+ violations = list(validate_body_inline(path, text))
+ assert violations == []
+
+
# ---------------------------------------------------------------------------
# SOFT category exposure
# ---------------------------------------------------------------------------
@@ -621,3 +723,4 @@ class TestSoftCategories:
def test_soft_categories_set(self) -> None:
assert PRINCIPLE_CATEGORY in SOFT_CATEGORIES
assert TRIGGER_PRESERVATION_CATEGORY in SOFT_CATEGORIES
+ assert BODY_INLINE_CATEGORY in SOFT_CATEGORIES