This is an automated email from the ASF dual-hosted git repository.

cmcfarlen pushed a commit to branch 10.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/10.1.x by this push:
     new f534fafed3 Cleanup: Track HttpCacheSM read retry event (#12193) 
(#12272)
f534fafed3 is described below

commit f534fafed31a2451057819b5b47f1b4bbff0bd18
Author: Masaori Koshiba <masa...@apache.org>
AuthorDate: Wed Jun 4 22:41:05 2025 +0900

    Cleanup: Track HttpCacheSM read retry event (#12193) (#12272)
    
    * Cleanup: Track HttpCacheSM read retry event
    
    * Fix handling ACTION_RESULT_DONE from CacheProcessor
    
    * Check mutex lock before canceling events
    
    * Fix regression tests
    
    (cherry picked from commit 87bd19916ea4738ad951b009e13c51cab9647d31)
    
    Conflicts:
            src/proxy/http/HttpCacheSM.cc
---
 include/proxy/http/HttpCacheSM.h |  18 +++++--
 src/api/InkAPITest.cc            |  13 +++++
 src/proxy/http/HttpCacheSM.cc    | 107 +++++++++++++++++++++++----------------
 src/proxy/http/HttpSM.cc         |   2 +
 4 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/include/proxy/http/HttpCacheSM.h b/include/proxy/http/HttpCacheSM.h
index 381977a987..34a8ccb430 100644
--- a/include/proxy/http/HttpCacheSM.h
+++ b/include/proxy/http/HttpCacheSM.h
@@ -62,6 +62,17 @@ private:
   HttpCacheSM *_cache_sm = nullptr;
 };
 
+/**
+  @class HttpCacheSM
+  @brief A state machine to handle cache from http
+
+  @startuml
+  hide empty description
+  [*]                   --> state_cache_open_read  : open_read()
+  [*]                   --> state_cache_open_write : open_write()
+  state_cache_open_read --> state_cache_open_write : open_write()
+  @enduml
+ */
 class HttpCacheSM : public Continuation
 {
 public:
@@ -75,6 +86,7 @@ public:
     captive_action.init(this);
   }
   void reset();
+  void cleanup();
 
   Action *open_read(const HttpCacheKey *key, URL *url, HTTPHdr *hdr, const 
OverridableHttpConfigParams *params,
                     time_t pin_in_cache);
@@ -260,7 +272,9 @@ private:
     const OverridableHttpConfigParams *_params = nullptr;
   };
 
-  void    do_schedule_in();
+  void   _schedule_read_retry();
+  Event *_read_retry_event = nullptr;
+
   Action *do_cache_open_read(const HttpCacheKey &);
 
   bool write_retry_done() const;
@@ -269,8 +283,6 @@ private:
   int state_cache_open_write(int event, void *data);
 
   HttpCacheAction captive_action;
-  bool            open_read_cb  = false;
-  bool            open_write_cb = false;
 
   // Open read parameters
   int                    open_read_tries  = 0;
diff --git a/src/api/InkAPITest.cc b/src/api/InkAPITest.cc
index a2e0a4f7ee..d174fc6a13 100644
--- a/src/api/InkAPITest.cc
+++ b/src/api/InkAPITest.cc
@@ -22,6 +22,7 @@
  */
 
 // Turn off -Wdeprecated so that we can still test our own deprecated APIs.
+
 #if defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3)
 #pragma GCC diagnostic ignored "-Wdeprecated"
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -46,6 +47,10 @@
 #include "records/RecCore.h"
 
 #include "../iocore/net/P_UnixNetVConnection.h"
+
+#include "iocore/eventsystem/Continuation.h"
+#include "iocore/eventsystem/Lock.h"
+
 #include "records/RecHttp.h"
 
 #include "proxy/http/HttpSM.h"
@@ -8736,6 +8741,11 @@ 
REGRESSION_TEST(SDK_API_OVERRIDABLE_CONFIGS)(RegressionTest *test, int /* atype
   int                    len;
 
   s->init();
+  s->mutex = new_ProxyMutex();
+  SCOPED_MUTEX_LOCK(lock, s->mutex, this_ethread());
+
+  HttpCacheSM *c_sm = &(s->get_cache_sm());
+  c_sm->init(s, s->mutex);
 
   *pstatus = REGRESSION_TEST_INPROGRESS;
   for (int i = 0; i < static_cast<int>(SDK_Overridable_Configs.size()); ++i) {
@@ -8835,9 +8845,12 @@ 
REGRESSION_TEST(SDK_API_TXN_HTTP_INFO_GET)(RegressionTest *test, int /* atype AT
   TSMgmtInt ival_read;
 
   s->init();
+  s->mutex = new_ProxyMutex();
+  SCOPED_MUTEX_LOCK(lock, s->mutex, this_ethread());
 
   *pstatus          = REGRESSION_TEST_INPROGRESS;
   HttpCacheSM *c_sm = &(s->get_cache_sm());
+  c_sm->init(s, s->mutex);
   c_sm->set_readwhilewrite_inprogress(true);
   c_sm->set_open_read_tries(5);
   c_sm->set_open_write_tries(8);
diff --git a/src/proxy/http/HttpCacheSM.cc b/src/proxy/http/HttpCacheSM.cc
index 3a53bde032..9c5d7348c3 100644
--- a/src/proxy/http/HttpCacheSM.cc
+++ b/src/proxy/http/HttpCacheSM.cc
@@ -22,10 +22,12 @@
  */
 
 #include "proxy/http/HttpCacheSM.h"
+#include "iocore/eventsystem/Thread.h"
 #include "proxy/http/HttpSM.h"
 #include "proxy/http/HttpDebugNames.h"
 
 #include "iocore/cache/Cache.h"
+#include "tscore/ink_assert.h"
 
 #define SM_REMEMBER(sm, e, r)                          \
   {                                                    \
@@ -71,6 +73,16 @@ HttpCacheSM::reset()
   captive_action.reset();
 }
 
+void
+HttpCacheSM::cleanup()
+{
+  ink_release_assert(this->mutex && this->mutex->thread_holding == 
this_ethread());
+
+  if (_read_retry_event != nullptr) {
+    _read_retry_event->cancel();
+  }
+}
+
 //////////////////////////////////////////////////////////////////////////
 //
 //  HttpCacheSM::state_cache_open_read()
@@ -117,7 +129,6 @@ HttpCacheSM::state_cache_open_read(int event, void *data)
       // redirect follow in progress, close the previous cache_read_vc
       close_read();
     }
-    open_read_cb  = true;
     cache_read_vc = static_cast<CacheVConnection *>(data);
     master_sm->handleEvent(event, &captive_action);
     break;
@@ -132,22 +143,23 @@ HttpCacheSM::state_cache_open_read(int event, void *data)
       // Somebody else is writing the object
       if (open_read_tries <= 
master_sm->t_state.txn_conf->max_cache_open_read_retries) {
         // Retry to read; maybe the update finishes in time
-        open_read_cb = false;
-        do_schedule_in();
+        _schedule_read_retry();
       } else {
         // Give up; the update didn't finish in time
         // HttpSM will inform HttpTransact to 'proxy-only'
-        open_read_cb = true;
         master_sm->handleEvent(event, &captive_action);
       }
     } else {
       // Simple miss in the cache.
-      open_read_cb = true;
       master_sm->handleEvent(event, &captive_action);
     }
     break;
 
   case EVENT_INTERVAL:
+    if (_read_retry_event == static_cast<Event *>(data)) {
+      _read_retry_event = nullptr;
+    }
+
     // Retry the cache open read if the number retries is less
     // than or equal to the max number of open read retries,
     // else treat as a cache miss.
@@ -197,7 +209,6 @@ HttpCacheSM::state_cache_open_write(int event, void *data)
     Metrics::Gauge::increment(http_rsb.current_cache_connections);
     ink_assert(cache_write_vc == nullptr);
     cache_write_vc = static_cast<CacheVConnection *>(data);
-    open_write_cb  = true;
     master_sm->handleEvent(event, &captive_action);
     break;
 
@@ -231,9 +242,8 @@ HttpCacheSM::state_cache_open_write(int event, void *data)
     }
 
     if (read_retry_on_write_fail || !write_retry_done()) {
-      // Retry open write;
-      open_write_cb = false;
-      do_schedule_in();
+      // Retry open read;
+      _schedule_read_retry();
     } else {
       // The cache is hosed or full or something.
       // Forward the failure to the main sm
@@ -241,19 +251,21 @@ HttpCacheSM::state_cache_open_write(int event, void *data)
           "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. 
"
           "done retrying...",
           master_sm->sm_id, open_write_tries);
-      open_write_cb = true;
-      err_code      = reinterpret_cast<intptr_t>(data);
+      err_code = reinterpret_cast<intptr_t>(data);
       master_sm->handleEvent(event, &captive_action);
     }
   } break;
 
   case EVENT_INTERVAL:
+    if (_read_retry_event == static_cast<Event *>(data)) {
+      _read_retry_event = nullptr;
+    }
+
     if (master_sm->t_state.txn_conf->cache_open_write_fail_action == 
CACHE_WL_FAIL_ACTION_READ_RETRY) {
       Dbg(dbg_ctl_http_cache,
           "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. 
"
           "falling back to read retry...",
           master_sm->sm_id, open_write_tries);
-      open_read_cb = false;
       master_sm->handleEvent(CACHE_EVENT_OPEN_READ, &captive_action);
     } else {
       Dbg(dbg_ctl_http_cache,
@@ -279,17 +291,22 @@ HttpCacheSM::state_cache_open_write(int event, void *data)
   return VC_EVENT_CONT;
 }
 
+/**
+  Schedule a read retry event to this HttpCacheSM continuation with 
cache_open_read_retry_time delay.
+  The scheduled event is tracked by `_read_retry_event`.
+ */
 void
-HttpCacheSM::do_schedule_in()
+HttpCacheSM::_schedule_read_retry()
 {
-  ink_assert(pending_action == nullptr);
-  Action *action_handle =
-    mutex->thread_holding->schedule_in(this, 
HRTIME_MSECONDS(master_sm->t_state.txn_conf->cache_open_read_retry_time));
+  ink_release_assert(this->mutex->thread_holding == this_ethread());
 
-  if (action_handle != ACTION_RESULT_DONE) {
-    pending_action = action_handle;
+  if (_read_retry_event != nullptr && _read_retry_event->cancelled == false) {
+    _read_retry_event->cancel();
   }
 
+  _read_retry_event =
+    mutex->thread_holding->schedule_in(this, 
HRTIME_MSECONDS(master_sm->t_state.txn_conf->cache_open_read_retry_time));
+
   return;
 }
 
@@ -298,24 +315,25 @@ HttpCacheSM::do_cache_open_read(const HttpCacheKey &key)
 {
   open_read_tries++;
   ink_assert(pending_action == nullptr);
-  ink_assert(open_read_cb == false);
+
   // Initialising read-while-write-inprogress flag
   this->readwhilewrite_inprogress = false;
   Action *action_handle           = cacheProcessor.open_read(this, &key, 
this->read_request_hdr, &http_params);
 
   if (action_handle != ACTION_RESULT_DONE) {
-    pending_action = action_handle;
-  }
-  // Check to see if we've already called the user back
-  //  If we have then it's ACTION_RESULT_DONE, other wise
-  //  return our captive action and ensure that we are actually
-  //  doing something useful
-  if (open_read_cb == true) {
-    return ACTION_RESULT_DONE;
-  } else {
-    ink_assert(pending_action != nullptr);
-    captive_action.cancelled = 0; // Make sure not cancelled before we hand it 
out
+    pending_action           = action_handle;
+    captive_action.cancelled = false; // Make sure not cancelled before we 
hand it out
     return &captive_action;
+  } else {
+    // In some cases, CacheProcessor::open_read calls back to 
`state_cache_open_read` with a cache event. If the event is
+    // CACHE_EVENT_OPEN_READ_FAILED, a read retry event might be scheduled. In 
this case, CacheProcessor::open_read returns
+    // ACTION_RESULT_DONE, even though the read retry event is still pending. 
To indicate this situation to HttpSM, the scheduled
+    // read retry event is returned. HttpSM can cancel the event if necessary.
+    if (_read_retry_event && _read_retry_event->cancelled != true) {
+      return _read_retry_event;
+    } else {
+      return ACTION_RESULT_DONE;
+    }
   }
 }
 
@@ -335,8 +353,7 @@ HttpCacheSM::open_read(const HttpCacheKey *key, URL *url, 
HTTPHdr *hdr, const Ov
 
   lookup_max_recursive++;
   current_lookup_level++;
-  open_read_cb = false;
-  act_return   = do_cache_open_read(cache_key);
+  act_return = do_cache_open_read(cache_key);
   // the following logic is based on the assumption that the second
   // lookup won't happen if the HttpSM hasn't been called back for the
   // first lookup
@@ -366,8 +383,7 @@ HttpCacheSM::open_write(const HttpCacheKey *key, URL *url, 
HTTPHdr *request, Cac
   SET_HANDLER(&HttpCacheSM::state_cache_open_write);
   ink_assert(pending_action == nullptr);
   ink_assert((cache_write_vc == nullptr) || 
master_sm->t_state.redirect_info.redirect_in_process);
-  // INKqa12119
-  open_write_cb = false;
+
   open_write_tries++;
   if (0 == open_write_start) {
     open_write_start = ink_get_hrtime();
@@ -400,17 +416,18 @@ HttpCacheSM::open_write(const HttpCacheKey *key, URL 
*url, HTTPHdr *request, Cac
   Action        *action_handle = cacheProcessor.open_write(this, key, info, 
pin_in_cache);
 
   if (action_handle != ACTION_RESULT_DONE) {
-    pending_action = action_handle;
-  }
-  // Check to see if we've already called the user back
-  //  If we have then it's ACTION_RESULT_DONE, other wise
-  //  return our captive action and ensure that we are actually
-  //  doing something useful
-  if (open_write_cb == true) {
-    return ACTION_RESULT_DONE;
-  } else {
-    ink_assert(pending_action != nullptr);
-    captive_action.cancelled = 0; // Make sure not cancelled before we hand it 
out
+    pending_action           = action_handle;
+    captive_action.cancelled = false; // Make sure not cancelled before we 
hand it out
     return &captive_action;
+  } else {
+    // In some cases, CacheProcessor::open_write calls back to 
`state_cache_open_write` with a cache event. If the event is
+    // CACHE_EVENT_OPEN_WRITE_FAILED, a read retry event might be scheduled. 
In this case, CacheProcessor::open_read returns
+    // ACTION_RESULT_DONE, even though the read retry event is still pending. 
To indicate this situation to HttpSM, the scheduled
+    // read retry event is returned. HttpSM can cancel the event if necessary.
+    if (_read_retry_event && _read_retry_event->cancelled != true) {
+      return _read_retry_event;
+    } else {
+      return ACTION_RESULT_DONE;
+    }
   }
 }
diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc
index 4324fc334f..e24fbadaf5 100644
--- a/src/proxy/http/HttpSM.cc
+++ b/src/proxy/http/HttpSM.cc
@@ -258,6 +258,8 @@ HttpSM::cleanup()
   HttpConfig::release(t_state.http_config_param);
   m_remap->release();
 
+  cache_sm.cleanup();
+
   mutex.clear();
   tunnel.mutex.clear();
   cache_sm.mutex.clear();

Reply via email to