This is an automated email from the ASF dual-hosted git repository.
bnolsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 94f366de20 fix race condition with session thread migration (#10897)
94f366de20 is described below
commit 94f366de200d9dab31e3755a188a5e6290b1ade2
Author: Brian Olsen <[email protected]>
AuthorDate: Thu Feb 1 09:03:40 2024 -0700
fix race condition with session thread migration (#10897)
* fix race condition with session thread migration
* document why event_loop could be null
* do less work if per thread session pool
* remove extra unecessasry do_io_write call in critical section
---
src/iocore/net/EventIO.cc | 12 ++-
src/iocore/net/UnixNetVConnection.cc | 6 --
src/proxy/http/HttpSessionManager.cc | 154 ++++++++++++++++++-----------------
3 files changed, 90 insertions(+), 82 deletions(-)
diff --git a/src/iocore/net/EventIO.cc b/src/iocore/net/EventIO.cc
index 0ec7ff2078..29c9aefcd0 100644
--- a/src/iocore/net/EventIO.cc
+++ b/src/iocore/net/EventIO.cc
@@ -64,7 +64,11 @@ EventIO::modify(int e)
return 0;
}
- ink_assert(event_loop);
+ // Session migration may result in this condition.
+ if (nullptr == event_loop) {
+ return 1;
+ }
+
#if TS_USE_EPOLL && !defined(USE_EDGE_TRIGGER)
struct epoll_event ev;
memset(&ev, 0, sizeof(ev));
@@ -117,7 +121,11 @@ EventIO::refresh(int e)
return 0;
}
- ink_assert(event_loop);
+ // Session migration may result in this condition.
+ if (nullptr == event_loop) {
+ return 1;
+ }
+
#if TS_USE_KQUEUE && defined(USE_EDGE_TRIGGER)
e = e & events;
struct kevent ev[2];
diff --git a/src/iocore/net/UnixNetVConnection.cc
b/src/iocore/net/UnixNetVConnection.cc
index cc5bc933ee..8cec52fd4a 100644
--- a/src/iocore/net/UnixNetVConnection.cc
+++ b/src/iocore/net/UnixNetVConnection.cc
@@ -1373,12 +1373,6 @@ UnixNetVConnection::migrateToCurrentThread(Continuation
*cont, EThread *t)
void *arg = this->_prepareForMigration();
- // Do_io_close will signal the VC to be freed on the original thread
- // Since we moved the con context, the fd will not be closed
- // Go ahead and remove the fd from the original thread's epoll structure, so
it is not
- // processed on two threads simultaneously
- this->ep.stop();
-
// Create new VC:
UnixNetVConnection *newvc = static_cast<UnixNetVConnection
*>(this->_getNetProcessor()->allocate_vc(t));
ink_assert(newvc != nullptr);
diff --git a/src/proxy/http/HttpSessionManager.cc
b/src/proxy/http/HttpSessionManager.cc
index 9bb015053e..8c821775b5 100644
--- a/src/proxy/http/HttpSessionManager.cc
+++ b/src/proxy/http/HttpSessionManager.cc
@@ -146,57 +146,58 @@ ServerSessionPool::acquireSession(sockaddr const *addr,
CryptoHash const &hostna
HSMresult_t zret = HSM_NOT_FOUND;
to_return = nullptr;
+ // first section, match against fqdn/port
if ((TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTONLY & match_style) &&
!(TS_SERVER_SESSION_SHARING_MATCH_MASK_IP & match_style)) {
Debug("http_ss", "Search for host name only not IP. Pool size %zu",
m_fqdn_pool.count());
// This is broken out because only in this case do we check the host hash
first. The range must be checked
// to verify an upstream that matches port and SNI name is selected. Walk
backwards to select oldest.
- in_port_t port = ats_ip_port_cast(addr);
- auto first = m_fqdn_pool.find(hostname_hash);
- while (first != m_fqdn_pool.end() && first->hostname_hash ==
hostname_hash) {
- Debug("http_ss", "Compare port 0x%x against 0x%x", port,
ats_ip_port_cast(first->get_remote_addr()));
- if (port == ats_ip_port_cast(first->get_remote_addr()) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_SNI) ||
validate_sni(sm, first->get_netvc())) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTSNISYNC)
|| validate_host_sni(sm, first->get_netvc())) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_CERT) ||
validate_cert(sm, first->get_netvc()))) {
- zret = HSM_DONE;
+ in_port_t const port = ats_ip_port_cast(addr);
+ auto iter = m_fqdn_pool.find(hostname_hash);
+ while (iter != m_fqdn_pool.end() && iter->hostname_hash == hostname_hash) {
+ Debug("http_ss", "Compare port 0x%x against 0x%x", port,
ats_ip_port_cast(iter->get_remote_addr()));
+ if (port == ats_ip_port_cast(iter->get_remote_addr()) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_SNI) ||
validate_sni(sm, iter->get_netvc())) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTSNISYNC)
|| validate_host_sni(sm, iter->get_netvc())) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_CERT) ||
validate_cert(sm, iter->get_netvc()))) {
+ to_return = iter;
break;
}
- ++first;
+ ++iter;
}
- if (zret == HSM_DONE) {
- to_return = first;
- if (!to_return->is_multiplexing()) {
- this->removeSession(to_return);
- }
- } else if (first != m_fqdn_pool.end()) {
+
+ if (iter != m_fqdn_pool.end()) {
Debug("http_ss", "Failed find entry due to name mismatch %s",
sm->t_state.current.server->name);
}
+
+ // second section, match against ip addr (includes port)
} else if (TS_SERVER_SESSION_SHARING_MATCH_MASK_IP & match_style) { //
matching is not disabled.
- auto first = m_ip_pool.find(addr);
+ auto iter = m_ip_pool.find(addr);
// The range is all that is needed in the match IP case, otherwise need to
scan for matching fqdn
// And matches the other constraints as well
// Note the port is matched as part of the address key so it doesn't need
to be checked again.
if (match_style & (~TS_SERVER_SESSION_SHARING_MATCH_MASK_IP)) {
- while (first != m_ip_pool.end() &&
ats_ip_addr_port_eq(first->get_remote_addr(), addr)) {
- if ((!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTONLY) ||
first->hostname_hash == hostname_hash) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_SNI) ||
validate_sni(sm, first->get_netvc())) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTSNISYNC)
|| validate_host_sni(sm, first->get_netvc())) &&
- (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_CERT) ||
validate_cert(sm, first->get_netvc()))) {
- zret = HSM_DONE;
+ while (iter != m_ip_pool.end() &&
ats_ip_addr_port_eq(iter->get_remote_addr(), addr)) {
+ if ((!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTONLY) ||
iter->hostname_hash == hostname_hash) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_SNI) ||
validate_sni(sm, iter->get_netvc())) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_HOSTSNISYNC)
|| validate_host_sni(sm, iter->get_netvc())) &&
+ (!(match_style & TS_SERVER_SESSION_SHARING_MATCH_MASK_CERT) ||
validate_cert(sm, iter->get_netvc()))) {
+ to_return = iter;
break;
}
- ++first;
+ ++iter;
}
- } else if (first != m_ip_pool.end()) {
- zret = HSM_DONE;
+ } else if (iter != m_ip_pool.end()) {
+ to_return = iter;
}
- if (zret == HSM_DONE) {
- to_return = first;
- if (!to_return->is_multiplexing()) {
- this->removeSession(to_return);
- }
+ }
+
+ if (nullptr != to_return) {
+ zret = HSM_DONE;
+ if (!to_return->is_multiplexing()) {
+ this->removeSession(to_return);
}
}
+
return zret;
}
@@ -417,16 +418,17 @@ HSMresult_t
HttpSessionManager::_acquire_session(sockaddr const *ip, CryptoHash const
&hostname_hash, HttpSM *sm,
TSServerSessionSharingMatchMask
match_style, TSServerSessionSharingPoolType pool_type)
{
- PoolableSession *to_return = nullptr;
- HSMresult_t retval = HSM_NOT_FOUND;
- bool acquired = false;
+ PoolableSession *to_return = nullptr;
+ HSMresult_t retval = HSM_NOT_FOUND;
+ UnixNetVConnection *server_vc = nullptr;
+ EThread *const ethread = this_ethread();
+ bool acquired = false;
// Extend the mutex window until the acquired Server session is attached
// to the SM. Releasing the mutex before that results in race conditions
// due to a potential parallel network read on the VC with no mutex guarding
{
// Now check to see if we have a connection in our shared connection pool
- EThread *ethread = this_ethread();
Ptr<ProxyMutex> pool_mutex =
(TS_SERVER_SESSION_SHARING_POOL_THREAD == pool_type) ?
ethread->server_session_pool->mutex : m_g_pool->mutex;
@@ -443,54 +445,42 @@ HttpSessionManager::_acquire_session(sockaddr const *ip,
CryptoHash const &hostn
retval = m_g_pool->acquireSession(ip, hostname_hash, match_style,
sm, to_return);
acquired = (HSM_DONE == retval);
Debug("http_ss", "[acquire session] global pool search %s", to_return
? "successful" : "failed");
- // At this point to_return has been removed from the pool. Do we need
to move it
- // to the same thread?
- if (to_return) {
- UnixNetVConnection *server_vc = dynamic_cast<UnixNetVConnection
*>(to_return->get_netvc());
- if (server_vc) {
- // Disable i/o on this vc now, but, hold onto the g_pool cont
- // and the mutex to stop any stray events from getting in
- server_vc->do_io_read(m_g_pool, 0, nullptr);
- server_vc->do_io_write(m_g_pool, 0, nullptr);
- UnixNetVConnection *new_vc = server_vc->migrateToCurrentThread(sm,
ethread);
- // The VC moved, free up the original one
- if (new_vc != server_vc) {
- ink_assert(new_vc == nullptr || new_vc->nh != nullptr);
- if (!new_vc) {
- // Close out to_return, we were't able to get a connection
-
Metrics::Counter::increment(http_rsb.origin_shutdown_migration_failure);
- to_return->do_io_close();
- to_return = nullptr;
- retval = HSM_NOT_FOUND;
- } else {
- // Keep things from timing out on us
-
new_vc->set_inactivity_timeout(new_vc->get_inactivity_timeout());
- to_return->set_netvc(new_vc);
- }
- } else {
- // Keep things from timing out on us
+
+ // If thread must be migrated, clear out the VC's
+ // data and event handling on the original thread.
+ if (nullptr != to_return) {
+ server_vc = dynamic_cast<UnixNetVConnection
*>(to_return->get_netvc());
+ if (nullptr != server_vc) {
+ if (ethread != server_vc->get_thread()) {
+ SCOPED_MUTEX_LOCK(vclock, server_vc->mutex, ethread);
+ server_vc->ep.stop();
+ server_vc->do_io_read(m_g_pool, 0, nullptr);
server_vc->set_inactivity_timeout(server_vc->get_inactivity_timeout());
}
}
}
}
- } else { // Didn't get the lock. to_return is still NULL
+ } else { // Didn't get the lock. to_return is still nullptr
retval = HSM_RETRY;
}
+ }
- if (to_return) {
- if (sm->create_server_txn(to_return)) {
- Debug("http_ss", "[%" PRId64 "] [acquire session] return session from
shared pool", to_return->connection_id());
- to_return->state = PoolableSession::SSN_IN_USE;
- retval = HSM_DONE;
+ // now the vc is out of the pool with chance of thread migration
+ if (TS_SERVER_SESSION_SHARING_POOL_THREAD != pool_type && nullptr !=
to_return && nullptr != server_vc) {
+ UnixNetVConnection *const new_vc = server_vc->migrateToCurrentThread(sm,
ethread);
+ // The VC moved, free up the original one
+ if (new_vc != server_vc) {
+ ink_assert(nullptr == new_vc || nullptr != new_vc->nh);
+ if (nullptr == new_vc) {
+ // Close out to_return, we were't able to get a connection
+
Metrics::Counter::increment(http_rsb.origin_shutdown_migration_failure);
+ to_return->do_io_close(); // already done ??
+ to_return = nullptr;
+ retval = HSM_NOT_FOUND;
} else {
- Debug("http_ss", "[%" PRId64 "] [acquire session] failed to get
transaction on session from shared pool",
- to_return->connection_id());
- // Don't close the H2 origin. Otherwise you get use-after free with
the activity timeout cop
- if (!to_return->is_multiplexing()) {
- to_return->do_io_close();
- }
- retval = HSM_RETRY;
+ // Keep the new session from timing out on us
+ new_vc->set_inactivity_timeout(new_vc->get_inactivity_timeout());
+ to_return->set_netvc(new_vc);
}
}
}
@@ -499,6 +489,22 @@ HttpSessionManager::_acquire_session(sockaddr const *ip,
CryptoHash const &hostn
Metrics::Gauge::decrement(http_rsb.pooled_server_connections);
}
+ if (nullptr != to_return) {
+ if (sm->create_server_txn(to_return)) {
+ Debug("http_ss", "[%" PRId64 "] [acquire session] return session from
shared pool", to_return->connection_id());
+ to_return->state = PoolableSession::SSN_IN_USE;
+ retval = HSM_DONE;
+ } else {
+ Debug("http_ss", "[%" PRId64 "] [acquire session] failed to get
transaction on session from shared pool",
+ to_return->connection_id());
+ // Don't close the H2 origin. Otherwise you get use-after free with the
activity timeout cop
+ if (!to_return->is_multiplexing()) {
+ to_return->do_io_close();
+ }
+ retval = HSM_RETRY;
+ }
+ }
+
return retval;
}