Copilot commented on code in PR #12852:
URL: https://github.com/apache/trafficserver/pull/12852#discussion_r2765957720
##########
include/proxy/http/HttpTransact.h:
##########
@@ -703,6 +703,15 @@ class HttpTransact
/// configuration.
bool is_cacheable_due_to_negative_caching_configuration = false;
+ /// Set when stale content is served due to cache write lock failure.
+ /// Used to correctly attribute statistics and VIA strings.
+ bool serving_stale_due_to_write_lock = false;
+
+ /// Set when CACHE_LOOKUP_COMPLETE hook is deferred for action 5/6.
+ /// The hook will fire later with the final result once we know if
+ /// stale content will be served or if we're going to origin.
+ bool cache_lookup_complete_deferred = false;
+
MgmtByte cache_open_write_fail_action = 0;
Review Comment:
The new per-transaction flags `serving_stale_due_to_write_lock` and
`cache_lookup_complete_deferred` are members of `HttpTransact::State`, but
there’s no per-request reset path in `State::init()` (it only resets
`dns_info`) and the code only ever sets these flags to `true` / clears them in
a few cache paths. Because `HttpSM` reuses `t_state` across keep-alive
requests, these flags can leak into subsequent transactions and cause incorrect
cache lookup results / hook deferral. Reset both flags (and any related
cache-open-write-fail state) at the start of each new transaction and/or at the
start of each cache lookup attempt (e.g., where `cache_lookup_result` is reset)
so they cannot persist across requests.
```suggestion
/// Per-transaction cache open/write failure action.
/// This must be reset at the start of each new transaction or cache
lookup.
MgmtByte cache_open_write_fail_action = 0;
/// Reset per-transaction cache lookup / write-lock related flags.
///
/// This helper is intended to be called at the start of each new
/// transaction and whenever a new cache lookup attempt begins, to
/// ensure that stale values do not leak across transactions when
/// HttpTransact::State instances are reused (e.g., on keep-alive).
void
reset_cache_transaction_flags()
{
serving_stale_due_to_write_lock = false;
cache_lookup_complete_deferred = false;
cache_open_write_fail_action = 0;
}
```
##########
src/proxy/http/HttpTransact.cc:
##########
@@ -3270,21 +3363,55 @@ HttpTransact::HandleCacheOpenReadMiss(State *s)
s->cache_info.action = CacheAction_t::NO_ACTION;
} else if (s->api_server_response_no_store) { // plugin may have decided not
to cache the response
s->cache_info.action = CacheAction_t::NO_ACTION;
- } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY) {
+ } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY &&
!s->serving_stale_due_to_write_lock) {
// We've looped back around due to failing to read during READ_RETRY mode.
// Don't attempt another cache write - just proxy to origin without
caching.
TxnDbg(dbg_ctl_http_trans, "READ_RETRY cache read failed, bypassing
cache");
s->cache_info.action = CacheAction_t::NO_ACTION;
+ } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY) {
+ // READ_RETRY with serving_stale_due_to_write_lock = true means we found
stale content
+ // but can't serve it (e.g., MUST_PROXY auth). Hook already fired, just go
to origin.
+ TxnDbg(dbg_ctl_http_trans, "READ_RETRY stale found but requires origin,
bypassing cache");
Review Comment:
The comment and branching here imply a case where
`serving_stale_due_to_write_lock == true` but stale still “can’t be served
(e.g., MUST_PROXY auth)”. However, the only place this flag is set is
`what_is_document_freshness()`, which gates it on
`is_stale_cache_response_returnable()`—and that function already returns false
when authorization is required. As written, this second READ_RETRY branch looks
unreachable / misleading. Consider removing it or adjusting the flag-setting
logic so the states are consistent and the log message reflects a reachable
condition.
```suggestion
// READ_RETRY with serving_stale_due_to_write_lock = true means we're
already serving
// stale content due to a previous write-lock failure. The hook (if any)
has fired;
// just go to origin without attempting another cache write.
TxnDbg(dbg_ctl_http_trans, "READ_RETRY serving stale due to write lock,
bypassing cache and going to origin");
```
##########
src/proxy/http/HttpTransact.cc:
##########
@@ -3207,21 +3233,88 @@ HttpTransact::handle_cache_write_lock(State *s)
}
if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY) {
- TxnDbg(dbg_ctl_http_error, "calling hdr_info.server_request.destroy");
+ TxnDbg(dbg_ctl_http_trans, "READ_RETRY: destroying server_request before
cache hit handling");
s->hdr_info.server_request.destroy();
- HandleCacheOpenReadHitFreshness(s);
- } else {
- StateMachineAction_t next;
- next = how_to_open_connection(s);
- if (next == StateMachineAction_t::ORIGIN_SERVER_OPEN || next ==
StateMachineAction_t::ORIGIN_SERVER_RAW_OPEN) {
- s->next_action = next;
- TRANSACT_RETURN(next, nullptr);
+
+ // For READ_RETRY with cached object, determine if we can serve stale.
+ // If the initial lookup was a MISS, cache_lookup_complete_deferred is
true and we
+ // need to fire the hook here with the final result. If the initial lookup
was a
+ // HIT_STALE (revalidation case), the hook already fired and deferred is
false.
+ CacheHTTPInfo *obj = s->cache_info.object_read;
+ if (obj != nullptr) {
+ // Call what_is_document_freshness to check if stale can be served.
+ // For action 6 (READ_RETRY_STALE_ON_REVALIDATE), this sets
serving_stale_due_to_write_lock = true.
+ // For action 5 (READ_RETRY), serving_stale_due_to_write_lock remains
false because
+ // the bitwise check (action & STALE_ON_REVALIDATE) fails: 0x05 & 0x02 =
0.
+ what_is_document_freshness(s, &s->hdr_info.client_request,
obj->response_get());
+
+ if (s->serving_stale_due_to_write_lock) {
+ // Action 6: Serve stale content
+ TxnDbg(dbg_ctl_http_trans, "READ_RETRY: serving stale content (action
6)");
+ s->cache_lookup_result = CacheLookupResult_t::HIT_STALE;
+
+ // Fire the deferred CACHE_LOOKUP_COMPLETE hook with HIT_STALE if
needed
+ if (s->cache_lookup_complete_deferred) {
+ s->cache_lookup_complete_deferred = false;
+ TRANSACT_RETURN(StateMachineAction_t::API_CACHE_LOOKUP_COMPLETE,
HandleCacheOpenReadHit);
+ }
+ HandleCacheOpenReadHit(s);
+ } else {
+ // Action 5: Cannot serve stale, go to origin without caching.
+ // Set NO_ACTION to prevent infinite revalidation loop.
+ TxnDbg(dbg_ctl_http_trans, "READ_RETRY: stale found but not serving
(action 5), going to origin");
+ s->cache_info.action = CacheAction_t::NO_ACTION;
+ handle_cache_write_lock_go_to_origin(s);
Review Comment:
In the `READ_RETRY` write-lock state, the new logic no longer calls
`HandleCacheOpenReadHitFreshness()` when a cached object is found after the
retry. For action 5 (READ_RETRY) this means you’ll always go to origin
(NO_ACTION) even if the retry actually found a valid fresh cached object (the
main point of request collapsing). For action 6, the decision is currently
based on `is_stale_cache_response_returnable()`, which doesn’t check whether
the object is actually stale, so a fresh object can be misclassified as
“serving stale due to write lock” and counted/handled as stale. This should
continue serving genuine cache hits (fresh/warning) after a successful read
retry for both actions 5 and 6, and only take the special “serve stale without
revalidation” path when the object is truly stale and stale-serving is allowed.
```suggestion
// Call what_is_document_freshness to check if stale can be served
and/or if the
// object is a normal fresh cache hit.
// For action 6 (READ_RETRY_STALE_ON_REVALIDATE), this sets
serving_stale_due_to_write_lock = true
// when the object is stale and stale-serving is allowed.
what_is_document_freshness(s, &s->hdr_info.client_request,
obj->response_get());
if (s->serving_stale_due_to_write_lock) {
// Action 6: Serve stale content without revalidation.
TxnDbg(dbg_ctl_http_trans, "READ_RETRY: serving stale content
(action 6)");
s->cache_lookup_result = CacheLookupResult_t::HIT_STALE;
// Fire the deferred CACHE_LOOKUP_COMPLETE hook with HIT_STALE if
needed.
if (s->cache_lookup_complete_deferred) {
s->cache_lookup_complete_deferred = false;
TRANSACT_RETURN(StateMachineAction_t::API_CACHE_LOOKUP_COMPLETE,
HandleCacheOpenReadHit);
}
HandleCacheOpenReadHit(s);
} else {
// For both action 5 (READ_RETRY) and action 6 with a fresh object,
treat this
// as a normal cache hit after READ_RETRY and apply the standard
freshness logic.
TxnDbg(dbg_ctl_http_trans, "READ_RETRY: cache hit after retry, using
normal freshness handling");
// Fire the deferred CACHE_LOOKUP_COMPLETE hook if this originated
from an
// initial MISS (request collapsing case).
if (s->cache_lookup_complete_deferred) {
s->cache_lookup_complete_deferred = false;
TRANSACT_RETURN(StateMachineAction_t::API_CACHE_LOOKUP_COMPLETE,
HandleCacheOpenReadHitFreshness);
}
HandleCacheOpenReadHitFreshness(s);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]