This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new 7b41597 added --list-tags to print case tags with counts (#404)
7b41597 is described below
commit 7b4159720c45a389651c67e12799cd34e5910227
Author: Vandit Gupta <[email protected]>
AuthorDate: Sun May 31 09:53:27 2026 -0400
added --list-tags to print case tags with counts (#404)
---
tools/skill-evals/src/skill_evals/runner.py | 27 ++++++++++++++++++++
tools/skill-evals/tests/test_runner.py | 38 +++++++++++++++++++++++++++++
2 files changed, 65 insertions(+)
diff --git a/tools/skill-evals/src/skill_evals/runner.py
b/tools/skill-evals/src/skill_evals/runner.py
index ed80837..ad2abf7 100644
--- a/tools/skill-evals/src/skill_evals/runner.py
+++ b/tools/skill-evals/src/skill_evals/runner.py
@@ -689,6 +689,15 @@ def find_cases(path: Path) -> list[tuple[Path, Path]]:
return results
+def collect_tag_counts(cases: list[tuple[Path, Path]]) -> dict[str, int]:
+ """Return how many discovered cases carry each tag."""
+ counts: dict[str, int] = {}
+ for case_dir, _fixtures_dir in cases:
+ for tag in load_case_tags(case_dir):
+ counts[tag] = counts.get(tag, 0) + 1
+ return counts
+
+
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description=(
@@ -776,6 +785,15 @@ def main(argv: list[str] | None = None) -> int:
"times; a case is included if it has all requested tags."
),
)
+ parser.add_argument(
+ "--list-tags",
+ action="store_true",
+ help=(
+ "Print every distinct tag declared in case-meta.json under path, "
+ "with the number of cases carrying each tag, and exit without "
+ "running prompts."
+ ),
+ )
args = parser.parse_args(argv)
grader_explicit = args.grader_cli != DEFAULT_GRADER_CLI
@@ -783,6 +801,15 @@ def main(argv: list[str] | None = None) -> int:
parser.error("--grader-cli and --exact require --cli")
cases = find_cases(args.path)
+ if args.list_tags:
+ tag_counts = collect_tag_counts(cases)
+ if not tag_counts:
+ print("no tags found")
+ return 0
+ for tag in sorted(tag_counts):
+ print(f"{tag} {tag_counts[tag]}")
+ return 0
+
if args.tag:
requested_tags = set(args.tag)
cases = [
diff --git a/tools/skill-evals/tests/test_runner.py
b/tools/skill-evals/tests/test_runner.py
index f29960e..77519e1 100644
--- a/tools/skill-evals/tests/test_runner.py
+++ b/tools/skill-evals/tests/test_runner.py
@@ -31,6 +31,7 @@ from skill_evals.runner import (
build_corpus_text,
build_roster_text,
collect_diffs,
+ collect_tag_counts,
compare_outputs,
compare_with_grader,
extract_json_from_output,
@@ -945,6 +946,43 @@ def test_tag_filter_runs_only_matching_cases(tmp_path:
Path, capsys: pytest.Capt
assert "case-2-untagged" not in stdout
+def test_list_tags_prints_counts(tmp_path: Path, capsys:
pytest.CaptureFixture[str]):
+ """--list-tags prints distinct tags with per-tag case counts."""
+ fixtures_dir = tmp_path / "fixtures"
+ fixtures_dir.mkdir()
+ case1 = _make_case(fixtures_dir, "case-1")
+ case2 = _make_case(fixtures_dir, "case-2")
+ (case1 / "case-meta.json").write_text(json.dumps({"tags": ["llama"]}))
+ (case2 / "case-meta.json").write_text(json.dumps({"tags": ["qwen",
"llama"]}))
+
+ rc, stdout, stderr = _run_main(capsys, [str(fixtures_dir), "--list-tags"])
+ assert rc == 0
+ assert stderr == ""
+ assert stdout.strip().splitlines() == ["llama 2", "qwen 1"]
+
+
+def test_list_tags_no_tags_found(tmp_path: Path, capsys:
pytest.CaptureFixture[str]):
+ """--list-tags exits 0 with an informational line when no tags exist."""
+ fixtures_dir = tmp_path / "fixtures"
+ _make_case(fixtures_dir, "case-1")
+
+ rc, stdout, stderr = _run_main(capsys, [str(fixtures_dir), "--list-tags"])
+ assert rc == 0
+ assert stderr == ""
+ assert stdout.strip() == "no tags found"
+
+
+def test_collect_tag_counts(tmp_path: Path):
+ fixtures_dir = tmp_path / "fixtures"
+ case1 = _make_case(fixtures_dir, "case-1")
+ case2 = _make_case(fixtures_dir, "case-2")
+ (case1 / "case-meta.json").write_text(json.dumps({"tags": ["alpha"]}))
+ (case2 / "case-meta.json").write_text(json.dumps({"tags": ["alpha",
"beta"]}))
+
+ counts = collect_tag_counts(find_cases(fixtures_dir))
+ assert counts == {"alpha": 2, "beta": 1}
+
+
# ---------------------------------------------------------------------------
# load_grading_schema
# ---------------------------------------------------------------------------