bneradt commented on code in PR #12845: URL: https://github.com/apache/trafficserver/pull/12845#discussion_r2755337121
########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' Review Comment: Again, let's switch the description to be at the top. That's more readable for humans. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort" + description: "Verify ATS does not crash with READ_RETRY mode enabled" + # Should contain cache read retries (shows the system is working) + contains: + - expression: "retrying cache open read|read while write" + description: "Verify cache retry mechanism is active" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (VERY slow origin) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, first-request-slow-origin] + - [Host, example.com] + - [X-Request, first-slow] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first-slow', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + # Must be less than client timeout (5s) but long enough to trigger retries + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, first-origin] + - [X-Origin-Request, first] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that exhausts READ_RETRY attempts + # This runs in parallel and triggers the bug path + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 50ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, second-request-exhausted] + - [Host, example.com] + - [X-Request, second-exhausted] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'second-exhausted', as: equal}] + + # This request should NOT reach the server if read-while-writer works + # It should be served from Request 1's response + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, second-origin] + - [X-Origin-Request, second] + + # Proxy should respond with 200 from read-while-writer (first request's response) + proxy-response: + status: 200 + headers: + fields: + # Should get first-origin via read-while-writer (request collapsing works) + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Third session: Another concurrent request to stress test the system + ############################################################################# + - transactions: + - client-request: + # Arrives after second request starts but before first completes + delay: 100ms Review Comment: 200ms per my above suggestion. ########## tests/gold_tests/cache/replay/cache-read-retry-basic.replay.yaml: ########## @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with a basic scenario where read-while-writer eventually succeeds. +# +# Scenario: +# - Two concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2 fails write lock, enters READ_RETRY mode +# - Request 2 retries reads, eventually gets read-while-writer access +# - Both requests should succeed with 200 responses +# - Only ONE origin connection should be made (request collapsing) +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-basic' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with successful read-while-writer' + + server: + name: 'origin-read-retry-basic' + + client: + name: 'client-read-retry-basic' + + ats: + name: 'ts-read-retry-basic' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + proxy.config.http.cache.max_open_read_retries: 20 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert" + description: "Verify ATS does not crash with READ_RETRY mode" + # Should contain cache operations (basic validation) + contains: + - expression: "state_cache_open_write|CACHE_EVENT_OPEN" + description: "Verify cache operations occur" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (slow origin response) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /bigfile + headers: + fields: + - [uuid, first-request-write-lock] + - [Host, example.com] + - [X-Request, first] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 100] + - [Cache-Control, "max-age=300"] + - [X-Response, from-origin] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'from-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that should use READ_RETRY + # This runs in parallel with the first session + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 100ms + method: "GET" + version: "1.1" + url: /bigfile + headers: + fields: + - [uuid, second-request-read-retry] + - [Host, example.com] + - [X-Request, second] + + # Server should NOT receive this request (read-while-writer should work) + # If server receives it, request collapsing failed + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 100] + - [Cache-Control, "max-age=300"] + - [X-Response, duplicate-origin-connection] + + # Proxy should respond with 200, either from read-while-writer or cache + proxy-response: + status: 200 + headers: + fields: + # Should get the response, but preferably from read-while-writer + - [X-Response, {value: 'from-origin', as: equal}] + + ############################################################################# + # Third session: Verify content is cached after both requests complete + ############################################################################# + - transactions: + - client-request: + # Wait for previous transactions to complete + delay: 4s + method: "GET" + version: "1.1" + url: /bigfile + headers: + fields: + - [uuid, third-request-cache-hit] + - [Host, example.com] + - [X-Request, third] + + # Server should NOT receive this request (should be cache hit) + server-response: + status: 200 Review Comment: Let's make this a 400 response so we are doubly sure we get the cached 200. That adds another check that we get the cached, not origin, response. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort" + description: "Verify ATS does not crash with READ_RETRY mode enabled" + # Should contain cache read retries (shows the system is working) + contains: + - expression: "retrying cache open read|read while write" + description: "Verify cache retry mechanism is active" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (VERY slow origin) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, first-request-slow-origin] + - [Host, example.com] + - [X-Request, first-slow] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first-slow', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + # Must be less than client timeout (5s) but long enough to trigger retries + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, first-origin] + - [X-Origin-Request, first] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that exhausts READ_RETRY attempts + # This runs in parallel and triggers the bug path + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 50ms Review Comment: I suggest 100ms for this one, and 200ms for the second one. ########## tests/gold_tests/cache/replay/cache-read-retry-basic.replay.yaml: ########## @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with a basic scenario where read-while-writer eventually succeeds. +# +# Scenario: +# - Two concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2 fails write lock, enters READ_RETRY mode +# - Request 2 retries reads, eventually gets read-while-writer access +# - Both requests should succeed with 200 responses +# - Only ONE origin connection should be made (request collapsing) +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-basic' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with successful read-while-writer' + + server: + name: 'origin-read-retry-basic' + + client: + name: 'client-read-retry-basic' + + ats: + name: 'ts-read-retry-basic' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + proxy.config.http.cache.max_open_read_retries: 20 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer Review Comment: Let's remove this comment too. ########## tests/gold_tests/cache/cache-read-retry-mode.test.py: ########## @@ -0,0 +1,33 @@ +''' +Test cache_open_write_fail_action = 5 (READ_RETRY mode) +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Test.Summary = ''' +Test cache_open_write_fail_action = 5 (READ_RETRY mode) to verify: +1. Basic read-while-writer behavior with fail_action=5 +2. READ_RETRY mode configuration is accepted and functional +3. System does not crash under write lock contention +4. Requests are served correctly when read retries are exhausted +''' + +Test.ContinueOnFail = True + +# Tests using Proxy Verifier replays (deterministic, CI-friendly) +# These validate basic READ_RETRY behavior without timing-sensitive parallel requests Review Comment: Given the pretty thorough `Test.Summary` above, let's just remove these comments. ########## tests/gold_tests/cache/replay/cache-read-retry-basic.replay.yaml: ########## @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with a basic scenario where read-while-writer eventually succeeds. +# +# Scenario: +# - Two concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2 fails write lock, enters READ_RETRY mode +# - Request 2 retries reads, eventually gets read-while-writer access +# - Both requests should succeed with 200 responses +# - Only ONE origin connection should be made (request collapsing) +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-basic' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with successful read-while-writer' Review Comment: Let's switch the order of these - description should be at the top of the autest section. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer Review Comment: We can remove this comment. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort" + description: "Verify ATS does not crash with READ_RETRY mode enabled" + # Should contain cache read retries (shows the system is working) + contains: + - expression: "retrying cache open read|read while write" + description: "Verify cache retry mechanism is active" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (VERY slow origin) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, first-request-slow-origin] + - [Host, example.com] + - [X-Request, first-slow] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first-slow', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + # Must be less than client timeout (5s) but long enough to trigger retries + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, first-origin] + - [X-Origin-Request, first] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that exhausts READ_RETRY attempts + # This runs in parallel and triggers the bug path + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 50ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, second-request-exhausted] + - [Host, example.com] + - [X-Request, second-exhausted] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'second-exhausted', as: equal}] + + # This request should NOT reach the server if read-while-writer works + # It should be served from Request 1's response + server-response: + status: 200 Review Comment: Again, let's make this a 400 so we are sure we get the cached 200. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort" + description: "Verify ATS does not crash with READ_RETRY mode enabled" + # Should contain cache read retries (shows the system is working) + contains: + - expression: "retrying cache open read|read while write" + description: "Verify cache retry mechanism is active" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (VERY slow origin) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, first-request-slow-origin] + - [Host, example.com] + - [X-Request, first-slow] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first-slow', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + # Must be less than client timeout (5s) but long enough to trigger retries + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, first-origin] + - [X-Origin-Request, first] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that exhausts READ_RETRY attempts + # This runs in parallel and triggers the bug path + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 50ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, second-request-exhausted] + - [Host, example.com] + - [X-Request, second-exhausted] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'second-exhausted', as: equal}] + + # This request should NOT reach the server if read-while-writer works + # It should be served from Request 1's response + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, second-origin] + - [X-Origin-Request, second] + + # Proxy should respond with 200 from read-while-writer (first request's response) + proxy-response: + status: 200 + headers: + fields: + # Should get first-origin via read-while-writer (request collapsing works) + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Third session: Another concurrent request to stress test the system + ############################################################################# + - transactions: + - client-request: + # Arrives after second request starts but before first completes + delay: 100ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, third-request-concurrent] + - [Host, example.com] + - [X-Request, third-concurrent] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'third-concurrent', as: equal}] + + # This request will also be served via read-while-writer + server-response: + status: 200 Review Comment: 400 instead of 200, please. Again, so we ensure we get the cached, not origin, response. ########## tests/gold_tests/cache/replay/cache-read-retry-basic.replay.yaml: ########## @@ -0,0 +1,180 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with a basic scenario where read-while-writer eventually succeeds. +# +# Scenario: +# - Two concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2 fails write lock, enters READ_RETRY mode +# - Request 2 retries reads, eventually gets read-while-writer access +# - Both requests should succeed with 200 responses +# - Only ONE origin connection should be made (request collapsing) +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-basic' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with successful read-while-writer' + + server: + name: 'origin-read-retry-basic' + + client: + name: 'client-read-retry-basic' + + ats: + name: 'ts-read-retry-basic' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + proxy.config.http.cache.max_open_read_retries: 20 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert" + description: "Verify ATS does not crash with READ_RETRY mode" + # Should contain cache operations (basic validation) + contains: + - expression: "state_cache_open_write|CACHE_EVENT_OPEN" + description: "Verify cache operations occur" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (slow origin response) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /bigfile + headers: + fields: + - [uuid, first-request-write-lock] + - [Host, example.com] + - [X-Request, first] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + delay: 3s Review Comment: 3 seconds is more than we need. I know it only saves a second, but let's do 2s here and 3 for the later third-request-cache-hit transaction. Remember to update the "Scenario" comments at the top of this file with the updated timing. ########## tests/gold_tests/cache/replay/cache-read-retry-exhausted.replay.yaml: ########## @@ -0,0 +1,235 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This replay file tests cache_open_write_fail_action = 5 (READ_RETRY) +# with concurrent requests and slow origin responses. +# +# Scenario: +# - Multiple concurrent requests arrive for the same uncached URL +# - Request 1 gets write lock, connects to origin (slow, 3 second response) +# - Request 2-3 try to read, retry reads, eventually get read-while-writer access +# - All requests should succeed with 200 responses from read-while-writer +# - Only ONE origin connection should be made (request collapsing works) +# - System should NOT crash (validates stability with fail_action=5 enabled) +# +# Expected Behavior: +# 1. Request collapsing works correctly (all requests get first-origin response) +# 2. Read-while-writer functions properly +# 3. No crashes occur +# 4. Content is cached properly +# + +meta: + version: "1.0" + +autest: + dns: + name: 'dns-read-retry-exhausted' + description: 'Test cache_open_write_fail_action = 5 (READ_RETRY) with slow origin and concurrent requests to verify request collapsing and stability' + + server: + name: 'origin-read-retry-exhausted' + + client: + name: 'client-read-retry-exhausted' + + ats: + name: 'ts-read-retry-exhausted' + process_config: + enable_cache: true + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'http|cache|http_cache|http_trans' + # Enable READ_RETRY mode + proxy.config.http.cache.open_write_fail_action: 5 + # Configure retry parameters to exhaust quickly + proxy.config.http.cache.max_open_write_retries: 1 + proxy.config.http.cache.max_open_write_retry_timeout: 0 + # Only 5 read retries @ 100ms = 500ms total (not enough for 10s origin) + proxy.config.http.cache.max_open_read_retries: 5 + proxy.config.http.cache.open_read_retry_time: 100 + # Enable read-while-writer + proxy.config.cache.enable_read_while_writer: 1 + + remap_config: + - from: "http://example.com/" + to: "http://backend.example.com:{SERVER_HTTP_PORT}/" + + log_validation: + traffic_out: + # Should NOT contain crash indicators + excludes: + - expression: "FATAL|ALERT|Emergency|ink_release_assert|ink_abort" + description: "Verify ATS does not crash with READ_RETRY mode enabled" + # Should contain cache read retries (shows the system is working) + contains: + - expression: "retrying cache open read|read while write" + description: "Verify cache retry mechanism is active" + +sessions: + ############################################################################# + # First session: Request that gets the write lock (VERY slow origin) + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, first-request-slow-origin] + - [Host, example.com] + - [X-Request, first-slow] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'first-slow', as: equal}] + + server-response: + # Slow response (3 seconds) to allow second request to retry reads + # Must be less than client timeout (5s) but long enough to trigger retries + delay: 3s + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, first-origin] + - [X-Origin-Request, first] + + proxy-response: + status: 200 + headers: + fields: + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Second session: Concurrent request that exhausts READ_RETRY attempts + # This runs in parallel and triggers the bug path + ############################################################################# + - transactions: + - client-request: + # Small delay to ensure first request gets write lock first + delay: 50ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, second-request-exhausted] + - [Host, example.com] + - [X-Request, second-exhausted] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'second-exhausted', as: equal}] + + # This request should NOT reach the server if read-while-writer works + # It should be served from Request 1's response + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, second-origin] + - [X-Origin-Request, second] + + # Proxy should respond with 200 from read-while-writer (first request's response) + proxy-response: + status: 200 + headers: + fields: + # Should get first-origin via read-while-writer (request collapsing works) + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Third session: Another concurrent request to stress test the system + ############################################################################# + - transactions: + - client-request: + # Arrives after second request starts but before first completes + delay: 100ms + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, third-request-concurrent] + - [Host, example.com] + - [X-Request, third-concurrent] + + proxy-request: + headers: + fields: + - [X-Request, {value: 'third-concurrent', as: equal}] + + # This request will also be served via read-while-writer + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 200] + - [Cache-Control, "max-age=300"] + - [X-Response, third-origin] + - [X-Origin-Request, third] + + # Should get a response from read-while-writer + proxy-response: + status: 200 + headers: + fields: + # Should get first-origin via read-while-writer + - [X-Response, {value: 'first-origin', as: equal}] + + ############################################################################# + # Fourth session: Verify cache state after all the chaos + ############################################################################# + - transactions: + - client-request: + # Wait for all previous transactions to complete (3s origin + buffer) + delay: 5s + method: "GET" + version: "1.1" + url: /slowfile + headers: + fields: + - [uuid, fourth-request-verify] + - [Host, example.com] + - [X-Request, fourth-verify] + + # Server should NOT receive this request (should be cache hit) + server-response: + status: 200 Review Comment: 400 instead of 200 to ensure we get the cached response, not the origin one. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
