(trafficserver) branch master updated: Add fixes around fail_action 5 (#12845)

eze Mon, 02 Feb 2026 10:13:04 -0800

This is an automated email from the ASF dual-hosted git repository.

eze pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git



The following commit(s) were added to refs/heads/master by this push:
     new 4a7c5f77e9 Add fixes around fail_action 5 (#12845)
4a7c5f77e9 is described below

commit 4a7c5f77e975b3f10c610d3af1340e6505a559fa
Author: Evan Zelkowitz <[email protected]>
AuthorDate: Mon Feb 2 11:12:48 2026 -0700

    Add fixes around fail_action 5 (#12845)
    
    Add autest for fail action 5 cache read retry mode
    add fixes around read_retry write fail action 5
---
 src/proxy/http/HttpTransact.cc                     |  14 ++
 .../gold_tests/cache/cache-read-retry-mode.test.py |  30 +++
 .../cache/replay/cache-read-retry.replay.yaml      | 234 +++++++++++++++++++++
 3 files changed, 278 insertions(+)

diff --git a/src/proxy/http/HttpTransact.cc b/src/proxy/http/HttpTransact.cc
index 496699d183..4c2e67cd9d 100644
--- a/src/proxy/http/HttpTransact.cc
+++ b/src/proxy/http/HttpTransact.cc
@@ -3270,6 +3270,11 @@ HttpTransact::HandleCacheOpenReadMiss(State *s)
     s->cache_info.action = CacheAction_t::NO_ACTION;
   } else if (s->api_server_response_no_store) { // plugin may have decided not 
to cache the response
     s->cache_info.action = CacheAction_t::NO_ACTION;
+  } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY) {
+    // We've looped back around due to failing to read during READ_RETRY mode.
+    // Don't attempt another cache write - just proxy to origin without 
caching.
+    TxnDbg(dbg_ctl_http_trans, "READ_RETRY cache read failed, bypassing 
cache");
+    s->cache_info.action = CacheAction_t::NO_ACTION;
   } else {
     HttpTransact::set_cache_prepare_write_action_for_new_request(s);
   }
@@ -3342,6 +3347,15 @@ 
HttpTransact::set_cache_prepare_write_action_for_new_request(State *s)
     // don't have a state for that.
     ink_release_assert(s->redirect_info.redirect_in_process);
     s->cache_info.action = CacheAction_t::WRITE;
+  } else if (s->cache_info.write_lock_state == CacheWriteLock_t::READ_RETRY &&
+             (!s->redirect_info.redirect_in_process || 
s->txn_conf->redirect_use_orig_cache_key)) {
+    // Defensive: Should not reach here if HandleCacheOpenReadMiss check is 
working.
+    // If we somehow get here in READ_RETRY state, bypass cache unless we're 
in a redirect
+    // that uses a different cache key (redirect_use_orig_cache_key == 0).
+    // When redirect_use_orig_cache_key is enabled, the redirect uses the same 
cache key
+    // as the original request, so we'd hit the same write lock contention.
+    Error("set_cache_prepare_write_action_for_new_request called with 
READ_RETRY state");
+    s->cache_info.action = CacheAction_t::NO_ACTION;
   } else {
     s->cache_info.action           = CacheAction_t::PREPARE_TO_WRITE;
     s->cache_info.write_lock_state = HttpTransact::CacheWriteLock_t::INIT;
diff --git a/tests/gold_tests/cache/cache-read-retry-mode.test.py 
b/tests/gold_tests/cache/cache-read-retry-mode.test.py
new file mode 100644
index 0000000000..117a7b7bae
--- /dev/null
+++ b/tests/gold_tests/cache/cache-read-retry-mode.test.py
@@ -0,0 +1,30 @@
+'''
+Test cache_open_write_fail_action = 5 (READ_RETRY mode)
+'''
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+Test.Summary = '''
+Test cache_open_write_fail_action = 5 (READ_RETRY mode) to verify:
+1. Basic read-while-writer behavior with fail_action=5
+2. READ_RETRY mode configuration is accepted and functional
+3. System does not crash under write lock contention
+4. Requests are served correctly when read retries are exhausted
+'''
+
+Test.ContinueOnFail = True
+
+Test.ATSReplayTest(replay_file="replay/cache-read-retry.replay.yaml")
diff --git a/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml 
b/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml
new file mode 100644
index 0000000000..49e46e887c
--- /dev/null
+++ b/tests/gold_tests/cache/replay/cache-read-retry.replay.yaml
@@ -0,0 +1,234 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+#
+# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY)
+# with concurrent requests and slow origin responses.
+#
+# Scenario:
+# - Multiple concurrent requests arrive for the same uncached URL
+# - Request 1 gets write lock, connects to origin (slow, 3 second response)
+# - Request 2-3 try to read, retry reads, eventually get read-while-writer 
access
+# - All requests should succeed with 200 responses from read-while-writer
+# - Only ONE origin connection should be made (request collapsing works)
+# - System should NOT crash (validates stability with fail_action=5 enabled)
+#
+# Expected Behavior:
+# 1. Request collapsing works correctly (all requests get first-origin 
response)
+# 2. Read-while-writer functions properly
+# 3. No crashes occur
+# 4. Content is cached properly
+#
+
+meta:
+  version: "1.0"
+
+autest:
+  description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow 
origin and concurrent requests to verify request collapsing and stability'
+  dns:
+    name: 'dns-read-retry-exhausted'
+
+  server:
+    name: 'origin-read-retry-exhausted'
+
+  client:
+    name: 'client-read-retry-exhausted'
+
+  ats:
+    name: 'ts-read-retry-exhausted'
+    process_config:
+      enable_cache: true
+
+    records_config:
+      proxy.config.diags.debug.enabled: 1
+      proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans'
+      # Enable READ_RETRY mode
+      proxy.config.http.cache.open_write_fail_action: 5
+      # Configure retry parameters to exhaust quickly
+      proxy.config.http.cache.max_open_write_retries: 1
+      proxy.config.http.cache.max_open_write_retry_timeout: 0
+      # Only 5 read retries @ 100ms = 500ms total (not enough for 3s origin)
+      proxy.config.http.cache.max_open_read_retries: 5
+      proxy.config.http.cache.open_read_retry_time: 100
+      proxy.config.cache.enable_read_while_writer: 1
+
+    remap_config:
+      - from: "http://example.com/";
+        to: "http://backend.example.com:{SERVER_HTTP_PORT}/";
+
+    log_validation:
+      traffic_out:
+        # Should NOT contain crash indicators
+        excludes:
+          - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort"
+            description: "Verify ATS does not crash with READ_RETRY mode 
enabled"
+        # Should contain cache read retries (shows the system is working)
+        contains:
+          - expression: "retrying cache open read|read while write"
+            description: "Verify cache retry mechanism is active"
+
+sessions:
+  #############################################################################
+  # First session: Request that gets the write lock (VERY slow origin)
+  #############################################################################
+  - transactions:
+      - client-request:
+          method: "GET"
+          version: "1.1"
+          url: /slowfile
+          headers:
+            fields:
+              - [uuid, first-request-slow-origin]
+              - [Host, example.com]
+              - [X-Request, first-slow]
+
+        proxy-request:
+          headers:
+            fields:
+              - [X-Request, {value: 'first-slow', as: equal}]
+
+        server-response:
+          # Slow response (3 seconds) to allow second request to retry reads
+          # Must be less than client timeout (5s) but long enough to trigger 
retries
+          delay: 3s
+          status: 200
+          reason: OK
+          headers:
+            fields:
+              - [Content-Length, 200]
+              - [Cache-Control, "max-age=300"]
+              - [X-Response, first-origin]
+              - [X-Origin-Request, first]
+
+        proxy-response:
+          status: 200
+          headers:
+            fields:
+              - [X-Response, {value: 'first-origin', as: equal}]
+
+  #############################################################################
+  # Second session: Concurrent request that exhausts READ_RETRY attempts
+  # This runs in parallel and triggers the bug path
+  #############################################################################
+  - transactions:
+      - client-request:
+          # Small delay to ensure first request gets write lock first
+          delay: 100ms
+          method: "GET"
+          version: "1.1"
+          url: /slowfile
+          headers:
+            fields:
+              - [uuid, second-request-exhausted]
+              - [Host, example.com]
+              - [X-Request, second-exhausted]
+
+        proxy-request:
+          headers:
+            fields:
+              - [X-Request, {value: 'second-exhausted', as: equal}]
+
+        # This request should NOT reach the server if read-while-writer works
+        # It should be served from Request 1's response
+        server-response:
+          status: 400
+          reason: OK
+          headers:
+            fields:
+              - [Content-Length, 200]
+              - [Cache-Control, "max-age=300"]
+              - [X-Response, second-origin]
+              - [X-Origin-Request, second]
+
+        # Proxy should respond with 200 from read-while-writer (first 
request's response)
+        proxy-response:
+          status: 200
+          headers:
+            fields:
+              # Should get first-origin via read-while-writer (request 
collapsing works)
+              - [X-Response, {value: 'first-origin', as: equal}]
+
+  #############################################################################
+  # Third session: Another concurrent request to stress test the system
+  #############################################################################
+  - transactions:
+      - client-request:
+          # Arrives after second request starts but before first completes
+          delay: 300ms
+          method: "GET"
+          version: "1.1"
+          url: /slowfile
+          headers:
+            fields:
+              - [uuid, third-request-concurrent]
+              - [Host, example.com]
+              - [X-Request, third-concurrent]
+
+        proxy-request:
+          headers:
+            fields:
+              - [X-Request, {value: 'third-concurrent', as: equal}]
+
+        # This request will also be served via read-while-writer
+        server-response:
+          status: 400
+          reason: OK
+          headers:
+            fields:
+              - [Content-Length, 200]
+              - [Cache-Control, "max-age=300"]
+              - [X-Response, third-origin]
+              - [X-Origin-Request, third]
+
+        # Should get a response from read-while-writer
+        proxy-response:
+          status: 200
+          headers:
+            fields:
+              # Should get first-origin via read-while-writer
+              - [X-Response, {value: 'first-origin', as: equal}]
+
+  #############################################################################
+  # Fourth session: Verify cache state after all the chaos
+  #############################################################################
+  - transactions:
+      - client-request:
+          # Wait for all previous transactions to complete (3s origin + buffer)
+          delay: 5s
+          method: "GET"
+          version: "1.1"
+          url: /slowfile
+          headers:
+            fields:
+              - [uuid, fourth-request-verify]
+              - [Host, example.com]
+              - [X-Request, fourth-verify]
+
+        # Server should NOT receive this request (should be cache hit)
+        server-response:
+          status: 400
+          reason: OK
+          headers:
+            fields:
+              - [Content-Length, 200]
+              - [X-Response, should-not-reach-origin]
+
+        # Proxy should respond from cache with the first-origin response
+        proxy-response:
+          status: 200
+          headers:
+            fields:
+              - [X-Response, {value: 'first-origin', as: equal}]

(trafficserver) branch master updated: Add fixes around fail_action 5 (#12845)

Reply via email to