This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ee27b294c50 Improve auto-triage TUI: vault fallback, check TTL,
search, review questions (#64969)
ee27b294c50 is described below
commit ee27b294c5001b01b30220de174d36de735a9778
Author: André Ahlert <[email protected]>
AuthorDate: Thu Apr 9 18:31:22 2026 -0300
Improve auto-triage TUI: vault fallback, check TTL, search, review
questions (#64969)
- Use vault as fallback for single-PR fetch at startup, skipping the
GraphQL call when cached data is still fresh (4h TTL).
- Add 4h TTL to check vault so re-run results on the same commit
are picked up instead of serving stale cached data.
- Extend TUI search (/) to match by title and author in addition
to PR number.
- Pass diff_text to LLM assessment on on-demand re-evaluation so
review questions are included in the prompt.
Signed-off-by: André Ahlert <[email protected]>
---
.../src/airflow_breeze/commands/pr_commands.py | 55 +++++++++++++++++++---
dev/breeze/src/airflow_breeze/utils/pr_vault.py | 5 +-
dev/breeze/src/airflow_breeze/utils/tui_display.py | 26 ++++++----
dev/breeze/tests/test_pr_vault.py | 15 ++++--
4 files changed, 82 insertions(+), 19 deletions(-)
diff --git a/dev/breeze/src/airflow_breeze/commands/pr_commands.py
b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
index 78cd46e95bf..5eddc266a7d 100644
--- a/dev/breeze/src/airflow_breeze/commands/pr_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/pr_commands.py
@@ -248,9 +248,9 @@ def _cached_assess_pr(
return result
# Generate directed review questions from the diff if available.
- # Note: diff_text is not yet passed by the background thread-pool
submissions
- # (the diff may not be fetched at LLM submission time). Review questions
are
- # active when diff_text is provided explicitly (e.g. sequential review
mode).
+ # In the TUI, diff_text is passed when the diff has been fetched by the
+ # background executor before the LLM submission. In the non-TUI flow,
+ # it is passed explicitly during sequential review.
review_questions: list[str] | None = None
if diff_text:
from airflow_breeze.utils.pr_vault import generate_review_questions
@@ -1859,6 +1859,42 @@ def _fetch_single_pr_graphql(token: str,
github_repository: str, pr_number: int)
)
+def _load_pr_from_vault(github_repository: str, pr_number: int) -> PRData |
None:
+ """Try to load a PR from the vault. Returns None on miss or expired TTL.
+
+ The returned PRData has ``unresolved_threads=[]``, ``review_decisions=[]``,
+ and ``has_collaborator_review=False``. These are backfilled by
+ ``_enrich_candidate_details`` which runs during triage/review regardless
+ of whether the PR came from vault or the API.
+ """
+ from airflow_breeze.utils.pr_vault import load_pr
+
+ data = load_pr(github_repository, pr_number)
+ if data is None:
+ return None
+ return PRData(
+ number=data["number"],
+ title=data["title"],
+ body=data.get("body", ""),
+ url=data["url"],
+ created_at=data["created_at"],
+ updated_at=data["updated_at"],
+ node_id=data.get("node_id", ""),
+ author_login=data["author_login"],
+ author_association=data.get("author_association", "NONE"),
+ head_sha=data["head_sha"],
+ base_ref=data.get("base_ref", "main"),
+ check_summary=data.get("check_summary", ""),
+ checks_state=data.get("checks_state", "UNKNOWN"),
+ failed_checks=data.get("failed_checks", []),
+ commits_behind=data.get("commits_behind", 0),
+ is_draft=data.get("is_draft", False),
+ mergeable=data.get("mergeable", "UNKNOWN"),
+ labels=data.get("labels", []),
+ unresolved_threads=[],
+ )
+
+
_author_profile_cache: dict[str, dict] = {}
_author_profile_lock = threading.Lock()
@@ -5322,6 +5358,7 @@ def _run_tui_triage(
pr_body=cur_pr.body,
check_status_summary=cur_pr.check_summary,
llm_model=llm_model,
+ diff_text=diff_cache.get(cur_pr.number),
)
ctx.llm_future_to_pr[fut] = cur_pr
# Keep as PASSING with LLM in progress
@@ -10041,9 +10078,15 @@ def _fetch_initial_prs(
_initial_review_requested_user: str | None = None if review_mode else
review_requested_user
if pr_number:
- if not quiet:
- console_print(f"[info]Fetching PR #{pr_number} via GraphQL...[/]")
- all_prs = [_fetch_single_pr_graphql(token, github_repository,
pr_number)]
+ cached = _load_pr_from_vault(github_repository, pr_number)
+ if cached is not None:
+ if not quiet:
+ console_print(f"[info]Loaded PR #{pr_number} from vault
cache.[/]")
+ all_prs = [cached]
+ else:
+ if not quiet:
+ console_print(f"[info]Fetching PR #{pr_number} via
GraphQL...[/]")
+ all_prs = [_fetch_single_pr_graphql(token, github_repository,
pr_number)]
total_matching_prs = 1
elif len(review_requested_users) > 1 and not review_mode:
if not quiet:
diff --git a/dev/breeze/src/airflow_breeze/utils/pr_vault.py
b/dev/breeze/src/airflow_breeze/utils/pr_vault.py
index 7cfab560f4f..00208ec249e 100644
--- a/dev/breeze/src/airflow_breeze/utils/pr_vault.py
+++ b/dev/breeze/src/airflow_breeze/utils/pr_vault.py
@@ -74,8 +74,9 @@ def save_prs_batch(github_repository: str, prs) -> int:
# ── Check status vault ───────────────────────────────────────────
# Keyed by head_sha. Only caches fully-completed check results (no
-# IN_PROGRESS or QUEUED). Completed results never change for the same SHA.
-_check_vault = CacheStore("check_vault")
+# IN_PROGRESS or QUEUED). Uses a 4-hour TTL because checks can be
+# re-run on the same commit without a force push.
+_check_vault = CacheStore("check_vault", ttl_seconds=4 * 3600)
# Statuses that indicate checks are still running
_INCOMPLETE_STATUSES = {"IN_PROGRESS", "QUEUED", "PENDING"}
diff --git a/dev/breeze/src/airflow_breeze/utils/tui_display.py
b/dev/breeze/src/airflow_breeze/utils/tui_display.py
index eb408bfae4a..159f1661e8b 100644
--- a/dev/breeze/src/airflow_breeze/utils/tui_display.py
+++ b/dev/breeze/src/airflow_breeze/utils/tui_display.py
@@ -1753,7 +1753,7 @@ class TriageTUI:
matching entry. Pressing Escape cancels. Returns True if the cursor
moved.
"""
width, height = _get_terminal_size()
- prompt = "/ Jump to PR #: "
+ prompt = "/ Search (PR#, title, author): "
query = ""
while True:
@@ -1786,18 +1786,28 @@ class TriageTUI:
if not query:
return False
- # Match by PR number only
+ # Try exact PR number match first
+ stripped = query.lstrip("#")
try:
- target_num = int(query.lstrip("#"))
+ target_num = int(stripped)
except ValueError:
- return False
-
+ target_num = None
+
+ if target_num is not None:
+ for idx, entry in enumerate(self.entries):
+ if entry.pr.number == target_num:
+ self.cursor = idx
+ self.scroll_offset = idx
+ self._focus = _FocusPanel.PR_LIST
+ return True
+
+ # Fall back to text search on title/author (also for numeric queries
+ # that didn't match any PR number)
+ query_lower = query.lower()
for idx, entry in enumerate(self.entries):
- if entry.pr.number == target_num:
+ if query_lower in entry.pr.title.lower() or query_lower in
entry.pr.author_login.lower():
self.cursor = idx
- # Put the matched entry at the top of the visible list
self.scroll_offset = idx
- # Switch focus to PR list so the selection is highlighted
self._focus = _FocusPanel.PR_LIST
return True
diff --git a/dev/breeze/tests/test_pr_vault.py
b/dev/breeze/tests/test_pr_vault.py
index 3ab0c2eb4e6..4565f0a15d9 100644
--- a/dev/breeze/tests/test_pr_vault.py
+++ b/dev/breeze/tests/test_pr_vault.py
@@ -152,13 +152,22 @@ class TestCheckStatusVault:
save_check_status("apache/airflow", "sha_abc", {"SUCCESS": 1})
assert load_check_status("apache/airflow", "sha_different") is None
- def test_no_ttl_for_same_sha(self, _fake_cache_dir):
- """Check vault has no TTL — same SHA always returns same results."""
+ def test_ttl_expires_stale_results(self, _fake_cache_dir):
+ """Check vault uses 4h TTL so re-run results are picked up."""
save_check_status("apache/airflow", "sha_abc", {"SUCCESS": 1})
- # Even with old timestamp, should still return (no TTL)
loaded = load_check_status("apache/airflow", "sha_abc")
assert loaded is not None
+ # Simulate expiry by backdating cached_at
+ import json
+
+ cache_file = _fake_cache_dir / "checks_sha_abc.json"
+ data = json.loads(cache_file.read_text())
+ data["cached_at"] = data["cached_at"] - 5 * 3600 # 5 hours ago
+ cache_file.write_text(json.dumps(data))
+
+ assert load_check_status("apache/airflow", "sha_abc") is None
+
class TestWorkflowRunsVault:
def test_save_and_load(self, _fake_cache_dir):