This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.1.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/10.1.x by this push: new f534fafed3 Cleanup: Track HttpCacheSM read retry event (#12193) (#12272) f534fafed3 is described below commit f534fafed31a2451057819b5b47f1b4bbff0bd18 Author: Masaori Koshiba <masa...@apache.org> AuthorDate: Wed Jun 4 22:41:05 2025 +0900 Cleanup: Track HttpCacheSM read retry event (#12193) (#12272) * Cleanup: Track HttpCacheSM read retry event * Fix handling ACTION_RESULT_DONE from CacheProcessor * Check mutex lock before canceling events * Fix regression tests (cherry picked from commit 87bd19916ea4738ad951b009e13c51cab9647d31) Conflicts: src/proxy/http/HttpCacheSM.cc --- include/proxy/http/HttpCacheSM.h | 18 +++++-- src/api/InkAPITest.cc | 13 +++++ src/proxy/http/HttpCacheSM.cc | 107 +++++++++++++++++++++++---------------- src/proxy/http/HttpSM.cc | 2 + 4 files changed, 92 insertions(+), 48 deletions(-) diff --git a/include/proxy/http/HttpCacheSM.h b/include/proxy/http/HttpCacheSM.h index 381977a987..34a8ccb430 100644 --- a/include/proxy/http/HttpCacheSM.h +++ b/include/proxy/http/HttpCacheSM.h @@ -62,6 +62,17 @@ private: HttpCacheSM *_cache_sm = nullptr; }; +/** + @class HttpCacheSM + @brief A state machine to handle cache from http + + @startuml + hide empty description + [*] --> state_cache_open_read : open_read() + [*] --> state_cache_open_write : open_write() + state_cache_open_read --> state_cache_open_write : open_write() + @enduml + */ class HttpCacheSM : public Continuation { public: @@ -75,6 +86,7 @@ public: captive_action.init(this); } void reset(); + void cleanup(); Action *open_read(const HttpCacheKey *key, URL *url, HTTPHdr *hdr, const OverridableHttpConfigParams *params, time_t pin_in_cache); @@ -260,7 +272,9 @@ private: const OverridableHttpConfigParams *_params = nullptr; }; - void do_schedule_in(); + void _schedule_read_retry(); + Event *_read_retry_event = nullptr; + Action *do_cache_open_read(const HttpCacheKey &); bool write_retry_done() const; @@ -269,8 +283,6 @@ private: int state_cache_open_write(int event, void *data); HttpCacheAction captive_action; - bool open_read_cb = false; - bool open_write_cb = false; // Open read parameters int open_read_tries = 0; diff --git a/src/api/InkAPITest.cc b/src/api/InkAPITest.cc index a2e0a4f7ee..d174fc6a13 100644 --- a/src/api/InkAPITest.cc +++ b/src/api/InkAPITest.cc @@ -22,6 +22,7 @@ */ // Turn off -Wdeprecated so that we can still test our own deprecated APIs. + #if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) #pragma GCC diagnostic ignored "-Wdeprecated" #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -46,6 +47,10 @@ #include "records/RecCore.h" #include "../iocore/net/P_UnixNetVConnection.h" + +#include "iocore/eventsystem/Continuation.h" +#include "iocore/eventsystem/Lock.h" + #include "records/RecHttp.h" #include "proxy/http/HttpSM.h" @@ -8736,6 +8741,11 @@ REGRESSION_TEST(SDK_API_OVERRIDABLE_CONFIGS)(RegressionTest *test, int /* atype int len; s->init(); + s->mutex = new_ProxyMutex(); + SCOPED_MUTEX_LOCK(lock, s->mutex, this_ethread()); + + HttpCacheSM *c_sm = &(s->get_cache_sm()); + c_sm->init(s, s->mutex); *pstatus = REGRESSION_TEST_INPROGRESS; for (int i = 0; i < static_cast<int>(SDK_Overridable_Configs.size()); ++i) { @@ -8835,9 +8845,12 @@ REGRESSION_TEST(SDK_API_TXN_HTTP_INFO_GET)(RegressionTest *test, int /* atype AT TSMgmtInt ival_read; s->init(); + s->mutex = new_ProxyMutex(); + SCOPED_MUTEX_LOCK(lock, s->mutex, this_ethread()); *pstatus = REGRESSION_TEST_INPROGRESS; HttpCacheSM *c_sm = &(s->get_cache_sm()); + c_sm->init(s, s->mutex); c_sm->set_readwhilewrite_inprogress(true); c_sm->set_open_read_tries(5); c_sm->set_open_write_tries(8); diff --git a/src/proxy/http/HttpCacheSM.cc b/src/proxy/http/HttpCacheSM.cc index 3a53bde032..9c5d7348c3 100644 --- a/src/proxy/http/HttpCacheSM.cc +++ b/src/proxy/http/HttpCacheSM.cc @@ -22,10 +22,12 @@ */ #include "proxy/http/HttpCacheSM.h" +#include "iocore/eventsystem/Thread.h" #include "proxy/http/HttpSM.h" #include "proxy/http/HttpDebugNames.h" #include "iocore/cache/Cache.h" +#include "tscore/ink_assert.h" #define SM_REMEMBER(sm, e, r) \ { \ @@ -71,6 +73,16 @@ HttpCacheSM::reset() captive_action.reset(); } +void +HttpCacheSM::cleanup() +{ + ink_release_assert(this->mutex && this->mutex->thread_holding == this_ethread()); + + if (_read_retry_event != nullptr) { + _read_retry_event->cancel(); + } +} + ////////////////////////////////////////////////////////////////////////// // // HttpCacheSM::state_cache_open_read() @@ -117,7 +129,6 @@ HttpCacheSM::state_cache_open_read(int event, void *data) // redirect follow in progress, close the previous cache_read_vc close_read(); } - open_read_cb = true; cache_read_vc = static_cast<CacheVConnection *>(data); master_sm->handleEvent(event, &captive_action); break; @@ -132,22 +143,23 @@ HttpCacheSM::state_cache_open_read(int event, void *data) // Somebody else is writing the object if (open_read_tries <= master_sm->t_state.txn_conf->max_cache_open_read_retries) { // Retry to read; maybe the update finishes in time - open_read_cb = false; - do_schedule_in(); + _schedule_read_retry(); } else { // Give up; the update didn't finish in time // HttpSM will inform HttpTransact to 'proxy-only' - open_read_cb = true; master_sm->handleEvent(event, &captive_action); } } else { // Simple miss in the cache. - open_read_cb = true; master_sm->handleEvent(event, &captive_action); } break; case EVENT_INTERVAL: + if (_read_retry_event == static_cast<Event *>(data)) { + _read_retry_event = nullptr; + } + // Retry the cache open read if the number retries is less // than or equal to the max number of open read retries, // else treat as a cache miss. @@ -197,7 +209,6 @@ HttpCacheSM::state_cache_open_write(int event, void *data) Metrics::Gauge::increment(http_rsb.current_cache_connections); ink_assert(cache_write_vc == nullptr); cache_write_vc = static_cast<CacheVConnection *>(data); - open_write_cb = true; master_sm->handleEvent(event, &captive_action); break; @@ -231,9 +242,8 @@ HttpCacheSM::state_cache_open_write(int event, void *data) } if (read_retry_on_write_fail || !write_retry_done()) { - // Retry open write; - open_write_cb = false; - do_schedule_in(); + // Retry open read; + _schedule_read_retry(); } else { // The cache is hosed or full or something. // Forward the failure to the main sm @@ -241,19 +251,21 @@ HttpCacheSM::state_cache_open_write(int event, void *data) "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " "done retrying...", master_sm->sm_id, open_write_tries); - open_write_cb = true; - err_code = reinterpret_cast<intptr_t>(data); + err_code = reinterpret_cast<intptr_t>(data); master_sm->handleEvent(event, &captive_action); } } break; case EVENT_INTERVAL: + if (_read_retry_event == static_cast<Event *>(data)) { + _read_retry_event = nullptr; + } + if (master_sm->t_state.txn_conf->cache_open_write_fail_action == CACHE_WL_FAIL_ACTION_READ_RETRY) { Dbg(dbg_ctl_http_cache, "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " "falling back to read retry...", master_sm->sm_id, open_write_tries); - open_read_cb = false; master_sm->handleEvent(CACHE_EVENT_OPEN_READ, &captive_action); } else { Dbg(dbg_ctl_http_cache, @@ -279,17 +291,22 @@ HttpCacheSM::state_cache_open_write(int event, void *data) return VC_EVENT_CONT; } +/** + Schedule a read retry event to this HttpCacheSM continuation with cache_open_read_retry_time delay. + The scheduled event is tracked by `_read_retry_event`. + */ void -HttpCacheSM::do_schedule_in() +HttpCacheSM::_schedule_read_retry() { - ink_assert(pending_action == nullptr); - Action *action_handle = - mutex->thread_holding->schedule_in(this, HRTIME_MSECONDS(master_sm->t_state.txn_conf->cache_open_read_retry_time)); + ink_release_assert(this->mutex->thread_holding == this_ethread()); - if (action_handle != ACTION_RESULT_DONE) { - pending_action = action_handle; + if (_read_retry_event != nullptr && _read_retry_event->cancelled == false) { + _read_retry_event->cancel(); } + _read_retry_event = + mutex->thread_holding->schedule_in(this, HRTIME_MSECONDS(master_sm->t_state.txn_conf->cache_open_read_retry_time)); + return; } @@ -298,24 +315,25 @@ HttpCacheSM::do_cache_open_read(const HttpCacheKey &key) { open_read_tries++; ink_assert(pending_action == nullptr); - ink_assert(open_read_cb == false); + // Initialising read-while-write-inprogress flag this->readwhilewrite_inprogress = false; Action *action_handle = cacheProcessor.open_read(this, &key, this->read_request_hdr, &http_params); if (action_handle != ACTION_RESULT_DONE) { - pending_action = action_handle; - } - // Check to see if we've already called the user back - // If we have then it's ACTION_RESULT_DONE, other wise - // return our captive action and ensure that we are actually - // doing something useful - if (open_read_cb == true) { - return ACTION_RESULT_DONE; - } else { - ink_assert(pending_action != nullptr); - captive_action.cancelled = 0; // Make sure not cancelled before we hand it out + pending_action = action_handle; + captive_action.cancelled = false; // Make sure not cancelled before we hand it out return &captive_action; + } else { + // In some cases, CacheProcessor::open_read calls back to `state_cache_open_read` with a cache event. If the event is + // CACHE_EVENT_OPEN_READ_FAILED, a read retry event might be scheduled. In this case, CacheProcessor::open_read returns + // ACTION_RESULT_DONE, even though the read retry event is still pending. To indicate this situation to HttpSM, the scheduled + // read retry event is returned. HttpSM can cancel the event if necessary. + if (_read_retry_event && _read_retry_event->cancelled != true) { + return _read_retry_event; + } else { + return ACTION_RESULT_DONE; + } } } @@ -335,8 +353,7 @@ HttpCacheSM::open_read(const HttpCacheKey *key, URL *url, HTTPHdr *hdr, const Ov lookup_max_recursive++; current_lookup_level++; - open_read_cb = false; - act_return = do_cache_open_read(cache_key); + act_return = do_cache_open_read(cache_key); // the following logic is based on the assumption that the second // lookup won't happen if the HttpSM hasn't been called back for the // first lookup @@ -366,8 +383,7 @@ HttpCacheSM::open_write(const HttpCacheKey *key, URL *url, HTTPHdr *request, Cac SET_HANDLER(&HttpCacheSM::state_cache_open_write); ink_assert(pending_action == nullptr); ink_assert((cache_write_vc == nullptr) || master_sm->t_state.redirect_info.redirect_in_process); - // INKqa12119 - open_write_cb = false; + open_write_tries++; if (0 == open_write_start) { open_write_start = ink_get_hrtime(); @@ -400,17 +416,18 @@ HttpCacheSM::open_write(const HttpCacheKey *key, URL *url, HTTPHdr *request, Cac Action *action_handle = cacheProcessor.open_write(this, key, info, pin_in_cache); if (action_handle != ACTION_RESULT_DONE) { - pending_action = action_handle; - } - // Check to see if we've already called the user back - // If we have then it's ACTION_RESULT_DONE, other wise - // return our captive action and ensure that we are actually - // doing something useful - if (open_write_cb == true) { - return ACTION_RESULT_DONE; - } else { - ink_assert(pending_action != nullptr); - captive_action.cancelled = 0; // Make sure not cancelled before we hand it out + pending_action = action_handle; + captive_action.cancelled = false; // Make sure not cancelled before we hand it out return &captive_action; + } else { + // In some cases, CacheProcessor::open_write calls back to `state_cache_open_write` with a cache event. If the event is + // CACHE_EVENT_OPEN_WRITE_FAILED, a read retry event might be scheduled. In this case, CacheProcessor::open_read returns + // ACTION_RESULT_DONE, even though the read retry event is still pending. To indicate this situation to HttpSM, the scheduled + // read retry event is returned. HttpSM can cancel the event if necessary. + if (_read_retry_event && _read_retry_event->cancelled != true) { + return _read_retry_event; + } else { + return ACTION_RESULT_DONE; + } } } diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index 4324fc334f..e24fbadaf5 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -258,6 +258,8 @@ HttpSM::cleanup() HttpConfig::release(t_state.http_config_param); m_remap->release(); + cache_sm.cleanup(); + mutex.clear(); tunnel.mutex.clear(); cache_sm.mutex.clear();