This is an automated email from the ASF dual-hosted git repository.
eze pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 4a7c5f77e9 Add fixes around fail_action 5 (#12845)
4a7c5f77e9 is described below
commit 4a7c5f77e975b3f10c610d3af1340e6505a559fa
Author: Evan Zelkowitz <[email protected]>
AuthorDate: Mon Feb 2 11:12:48 2026 -0700
Add fixes around fail_action 5 (#12845)
Add autest for fail action 5 cache read retry mode
add fixes around read_retry write fail action 5
---
src/proxy/http/HttpTransact.cc | 14 ++
.../gold_tests/cache/cache-read-retry-mode.test.py | 30 +++
.../cache/replay/cache-read-retry.replay.yaml | 234 +++++++++++++++++++++
3 files changed, 278 insertions(+)
diff --git a/src/proxy/http/HttpTransact.cc b/src/proxy/http/HttpTransact.cc
index 496699d183..4c2e67cd9d 100644
--- a/src/proxy/http/HttpTransact.cc
+++ b/src/proxy/http/HttpTransact.cc
@@ -3270,6 +3270,11 @@ HttpTransact::HandleCacheOpenReadMiss(State *s)
s->cache_info.action = CacheAction_t::NO_ACTION;
} else if (s->api_server_response_no_store) { // plugin may have decided not
to cache the response
s->cache_info.action = CacheAction_t::NO_ACTION;
+ } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY) {
+ // We've looped back around due to failing to read during READ_RETRY mode.
+ // Don't attempt another cache write - just proxy to origin without
caching.
+ TxnDbg(dbg_ctl_http_trans, "READ_RETRY cache read failed, bypassing
cache");
+ s->cache_info.action = CacheAction_t::NO_ACTION;
} else {
HttpTransact::set_cache_prepare_write_action_for_new_request(s);
}
@@ -3342,6 +3347,15 @@
HttpTransact::set_cache_prepare_write_action_for_new_request(State *s)
// don't have a state for that.
ink_release_assert(s->redirect_info.redirect_in_process);
s->cache_info.action = CacheAction_t::WRITE;
+ } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY &&
+ (!s->redirect_info.redirect_in_process ||
s->txn_conf->redirect_use_orig_cache_key)) {
+ // Defensive: Should not reach here if HandleCacheOpenReadMiss check is
working.
+ // If we somehow get here in READ_RETRY state, bypass cache unless we're
in a redirect
+ // that uses a different cache key (redirect_use_orig_cache_key == 0).
+ // When redirect_use_orig_cache_key is enabled, the redirect uses the same
cache key
+ // as the original request, so we'd hit the same write lock contention.
+ Error("set_cache_prepare_write_action_for_new_request called with
READ_RETRY state");
+ s->cache_info.action = CacheAction_t::NO_ACTION;
} else {
s->cache_info.action = CacheAction_t::PREPARE_TO_WRITE;
s->cache_info.write_lock_state = HttpTransact::CacheWriteLock_t::INIT;
diff --git a/tests/gold_tests/cache/cache-read-retry-mode.test.py
b/tests/gold_tests/cache/cache-read-retry-mode.test.py
new file mode 100644
index 0000000000..117a7b7bae
--- /dev/null
+++ b/tests/gold_tests/cache/cache-read-retry-mode.test.py
@@ -0,0 +1,30 @@
+'''
+Test cache_open_write_fail_action = 5 (READ_RETRY mode)
+'''
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Test.Summary = '''
+Test cache_open_write_fail_action = 5 (READ_RETRY mode) to verify:
+1. Basic read-while-writer behavior with fail_action=5
+2. READ_RETRY mode configuration is accepted and functional
+3. System does not crash under write lock contention
+4. Requests are served correctly when read retries are exhausted
+'''
+
+Test.ContinueOnFail = True
+
+Test.ATSReplayTest(replay_file="replay/cache-read-retry.replay.yaml")
diff --git a/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml
b/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml
new file mode 100644
index 0000000000..49e46e887c
--- /dev/null
+++ b/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml
@@ -0,0 +1,234 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY)
+# with concurrent requests and slow origin responses.
+#
+# Scenario:
+# - Multiple concurrent requests arrive for the same uncached URL
+# - Request 1 gets write lock, connects to origin (slow, 3 second response)
+# - Request 2-3 try to read, retry reads, eventually get read-while-writer
access
+# - All requests should succeed with 200 responses from read-while-writer
+# - Only ONE origin connection should be made (request collapsing works)
+# - System should NOT crash (validates stability with fail_action=5 enabled)
+#
+# Expected Behavior:
+# 1. Request collapsing works correctly (all requests get first-origin
response)
+# 2. Read-while-writer functions properly
+# 3. No crashes occur
+# 4. Content is cached properly
+#
+
+meta:
+ version: "1.0"
+
+autest:
+ description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow
origin and concurrent requests to verify request collapsing and stability'
+ dns:
+ name: 'dns-read-retry-exhausted'
+
+ server:
+ name: 'origin-read-retry-exhausted'
+
+ client:
+ name: 'client-read-retry-exhausted'
+
+ ats:
+ name: 'ts-read-retry-exhausted'
+ process_config:
+ enable_cache: true
+
+ records_config:
+ proxy.config.diags.debug.enabled: 1
+ proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans'
+ # Enable READ_RETRY mode
+ proxy.config.http.cache.open_write_fail_action: 5
+ # Configure retry parameters to exhaust quickly
+ proxy.config.http.cache.max_open_write_retries: 1
+ proxy.config.http.cache.max_open_write_retry_timeout: 0
+ # Only 5 read retries @ 100ms = 500ms total (not enough for 3s origin)
+ proxy.config.http.cache.max_open_read_retries: 5
+ proxy.config.http.cache.open_read_retry_time: 100
+ proxy.config.cache.enable_read_while_writer: 1
+
+ remap_config:
+ - from: "http://example.com/"
+ to: "http://backend.example.com:{SERVER_HTTP_PORT}/"
+
+ log_validation:
+ traffic_out:
+ # Should NOT contain crash indicators
+ excludes:
+ - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort"
+ description: "Verify ATS does not crash with READ_RETRY mode
enabled"
+ # Should contain cache read retries (shows the system is working)
+ contains:
+ - expression: "retrying cache open read|read while write"
+ description: "Verify cache retry mechanism is active"
+
+sessions:
+ #############################################################################
+ # First session: Request that gets the write lock (VERY slow origin)
+ #############################################################################
+ - transactions:
+ - client-request:
+ method: "GET"
+ version: "1.1"
+ url: /slowfile
+ headers:
+ fields:
+ - [uuid, first-request-slow-origin]
+ - [Host, example.com]
+ - [X-Request, first-slow]
+
+ proxy-request:
+ headers:
+ fields:
+ - [X-Request, {value: 'first-slow', as: equal}]
+
+ server-response:
+ # Slow response (3 seconds) to allow second request to retry reads
+ # Must be less than client timeout (5s) but long enough to trigger
retries
+ delay: 3s
+ status: 200
+ reason: OK
+ headers:
+ fields:
+ - [Content-Length, 200]
+ - [Cache-Control, "max-age=300"]
+ - [X-Response, first-origin]
+ - [X-Origin-Request, first]
+
+ proxy-response:
+ status: 200
+ headers:
+ fields:
+ - [X-Response, {value: 'first-origin', as: equal}]
+
+ #############################################################################
+ # Second session: Concurrent request that exhausts READ_RETRY attempts
+ # This runs in parallel and triggers the bug path
+ #############################################################################
+ - transactions:
+ - client-request:
+ # Small delay to ensure first request gets write lock first
+ delay: 100ms
+ method: "GET"
+ version: "1.1"
+ url: /slowfile
+ headers:
+ fields:
+ - [uuid, second-request-exhausted]
+ - [Host, example.com]
+ - [X-Request, second-exhausted]
+
+ proxy-request:
+ headers:
+ fields:
+ - [X-Request, {value: 'second-exhausted', as: equal}]
+
+ # This request should NOT reach the server if read-while-writer works
+ # It should be served from Request 1's response
+ server-response:
+ status: 400
+ reason: OK
+ headers:
+ fields:
+ - [Content-Length, 200]
+ - [Cache-Control, "max-age=300"]
+ - [X-Response, second-origin]
+ - [X-Origin-Request, second]
+
+ # Proxy should respond with 200 from read-while-writer (first
request's response)
+ proxy-response:
+ status: 200
+ headers:
+ fields:
+ # Should get first-origin via read-while-writer (request
collapsing works)
+ - [X-Response, {value: 'first-origin', as: equal}]
+
+ #############################################################################
+ # Third session: Another concurrent request to stress test the system
+ #############################################################################
+ - transactions:
+ - client-request:
+ # Arrives after second request starts but before first completes
+ delay: 300ms
+ method: "GET"
+ version: "1.1"
+ url: /slowfile
+ headers:
+ fields:
+ - [uuid, third-request-concurrent]
+ - [Host, example.com]
+ - [X-Request, third-concurrent]
+
+ proxy-request:
+ headers:
+ fields:
+ - [X-Request, {value: 'third-concurrent', as: equal}]
+
+ # This request will also be served via read-while-writer
+ server-response:
+ status: 400
+ reason: OK
+ headers:
+ fields:
+ - [Content-Length, 200]
+ - [Cache-Control, "max-age=300"]
+ - [X-Response, third-origin]
+ - [X-Origin-Request, third]
+
+ # Should get a response from read-while-writer
+ proxy-response:
+ status: 200
+ headers:
+ fields:
+ # Should get first-origin via read-while-writer
+ - [X-Response, {value: 'first-origin', as: equal}]
+
+ #############################################################################
+ # Fourth session: Verify cache state after all the chaos
+ #############################################################################
+ - transactions:
+ - client-request:
+ # Wait for all previous transactions to complete (3s origin + buffer)
+ delay: 5s
+ method: "GET"
+ version: "1.1"
+ url: /slowfile
+ headers:
+ fields:
+ - [uuid, fourth-request-verify]
+ - [Host, example.com]
+ - [X-Request, fourth-verify]
+
+ # Server should NOT receive this request (should be cache hit)
+ server-response:
+ status: 400
+ reason: OK
+ headers:
+ fields:
+ - [Content-Length, 200]
+ - [X-Response, should-not-reach-origin]
+
+ # Proxy should respond from cache with the first-origin response
+ proxy-response:
+ status: 200
+ headers:
+ fields:
+ - [X-Response, {value: 'first-origin', as: equal}]