This is an automated email from the ASF dual-hosted git repository.
bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 89a5deffa0 Fix NetAcceptAction::cancel() use-after-free race condition
(#12803)
89a5deffa0 is described below
commit 89a5deffa0736302040754a4e2e5cfb73c6bd304
Author: Brian Neradt <[email protected]>
AuthorDate: Thu Jan 15 14:23:51 2026 -0600
Fix NetAcceptAction::cancel() use-after-free race condition (#12803)
Fix a race condition between NetAcceptAction::cancel() and
NetAccept::acceptEvent() where the server pointer could be dereferenced
after the NetAccept object was deleted.
The race occurred as follows:
1. Thread T4 calls NetAcceptAction::cancel(), sets cancelled=true
2. Thread T3 running acceptEvent() sees cancelled==true
3. Thread T3 deletes the NetAccept (including embedded Server)
4. Thread T4 tries to call server->close() on freed memory
The fix uses std::atomic<Server*> with atomic exchange to ensure only
one thread can successfully obtain and use the server pointer. Both
cancel() and the cleanup paths before delete this atomically exchange
the pointer with nullptr - whichever succeeds first closes the server,
the other becomes a no-op.
This addresses the TODO comment that was in the code:
"// TODO fix race between cancel accept and call back"
ASAN report this fixes (seen intermittently on rocky CI builds):
==8850==ERROR: AddressSanitizer: heap-use-after-free on address
0x616000028cb4 at pc 0x000001346739 bp 0x7fa40fd2f580 sp 0x7fa40fd2f570
WRITE of size 4 at 0x616000028cb4 thread T4 ([ET_NET 2])
#0 0x1346738 in UnixSocket::close()
../src/iocore/eventsystem/UnixSocket.cc:138
#1 0x12b44ed in Server::close() ../src/iocore/net/Server.cc:88
#2 0x121fb95 in NetAcceptAction::cancel(Continuation*)
../src/iocore/net/P_NetAccept.h:71
#3 0x7fa41686d082 in TSActionCancel(tsapi_action*)
../src/api/InkAPI.cc:5828
...
0x616000028cb4 is located 308 bytes inside of 576-byte region
[0x616000028b80,0x616000028dc0)
freed by thread T3 ([ET_NET 1]) here:
#0 0x7fa416d2036f in operator delete(void*, unsigned long)
#1 0x12593c4 in NetAccept::~NetAccept()
../src/iocore/net/P_NetAccept.h:128
#2 0x12bebf0 in NetAccept::acceptEvent(int, void*)
../src/iocore/net/UnixNetAccept.cc:484
...
---
src/iocore/net/P_NetAccept.h | 27 ++++++++++++++++++---------
src/iocore/net/QUICNetProcessor.cc | 4 +---
src/iocore/net/UnixNetAccept.cc | 5 ++++-
src/iocore/net/UnixNetProcessor.cc | 4 +---
4 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/src/iocore/net/P_NetAccept.h b/src/iocore/net/P_NetAccept.h
index 163d5578d5..22a2dd5bca 100644
--- a/src/iocore/net/P_NetAccept.h
+++ b/src/iocore/net/P_NetAccept.h
@@ -43,6 +43,7 @@
#include "iocore/net/NetAcceptEventIO.h"
#include "Server.h"
+#include <atomic>
#include <vector>
struct NetAccept;
@@ -60,21 +61,29 @@ AcceptFunction net_accept;
class UnixNetVConnection;
-// TODO fix race between cancel accept and call back
struct NetAcceptAction : public Action, public RefCountObjInHeap {
- Server *server;
+ std::atomic<Server *> server{nullptr};
- void
- cancel(Continuation *cont = nullptr) override
+ NetAcceptAction(Continuation *cont, Server *s)
{
- Action::cancel(cont);
- server->close();
+ continuation = cont;
+ if (cont != nullptr) {
+ mutex = cont->mutex;
+ }
+ server.store(s, std::memory_order_release);
}
- Continuation *
- operator=(Continuation *acont)
+ void
+ cancel(Continuation *cont = nullptr) override
{
- return Action::operator=(acont);
+ // Close the server before setting the cancelled flag. This ensures that
+ // when acceptEvent() sees cancelled == true, the server close is already
+ // complete, preventing use-after-free races.
+ Server *s = server.exchange(nullptr, std::memory_order_acq_rel);
+ if (s != nullptr) {
+ s->close();
+ }
+ Action::cancel(cont);
}
~NetAcceptAction() override
diff --git a/src/iocore/net/QUICNetProcessor.cc
b/src/iocore/net/QUICNetProcessor.cc
index 55592ef6f3..d7f7012606 100644
--- a/src/iocore/net/QUICNetProcessor.cc
+++ b/src/iocore/net/QUICNetProcessor.cc
@@ -251,9 +251,7 @@ QUICNetProcessor::main_accept(Continuation *cont, SOCKET
fd, AcceptOptions const
na->server.sock = UnixSocket{fd};
ats_ip_copy(&na->server.accept_addr, &accept_ip);
- na->action_ = new NetAcceptAction();
- *na->action_ = cont;
- na->action_->server = &na->server;
+ na->action_ = new NetAcceptAction(cont, &na->server);
na->init_accept();
return na->action_.get();
diff --git a/src/iocore/net/UnixNetAccept.cc b/src/iocore/net/UnixNetAccept.cc
index cea9df7879..4b69b8641f 100644
--- a/src/iocore/net/UnixNetAccept.cc
+++ b/src/iocore/net/UnixNetAccept.cc
@@ -479,6 +479,7 @@ NetAccept::acceptEvent(int event, void *ep)
MUTEX_TRY_LOCK(lock, m, e->ethread);
if (lock.is_locked()) {
if (action_->cancelled) {
+ // Server was already closed by whoever called cancel().
e->cancel();
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
delete this;
@@ -487,6 +488,7 @@ NetAccept::acceptEvent(int event, void *ep)
int res;
if ((res = net_accept(this, e, false)) < 0) {
+ action_->cancel();
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
/* INKqa11179 */
Warning("Accept on port %d failed with error no %d",
ats_ip_port_host_order(&server.addr), res);
@@ -637,7 +639,7 @@ Ldone:
return EVENT_CONT;
Lerror:
- server.close();
+ action_->cancel();
e->cancel();
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
delete this;
@@ -656,6 +658,7 @@ NetAccept::acceptLoopEvent(int event, Event *e)
}
// Don't think this ever happens ...
+ action_->cancel();
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
delete this;
return EVENT_DONE;
diff --git a/src/iocore/net/UnixNetProcessor.cc
b/src/iocore/net/UnixNetProcessor.cc
index 15630281c5..5c8d5111e1 100644
--- a/src/iocore/net/UnixNetProcessor.cc
+++ b/src/iocore/net/UnixNetProcessor.cc
@@ -133,9 +133,7 @@ UnixNetProcessor::accept_internal(Continuation *cont, int
fd, AcceptOptions cons
na->proxyPort = sa ? sa->proxyPort : nullptr;
na->snpa = dynamic_cast<SSLNextProtocolAccept *>(cont);
- na->action_ = new NetAcceptAction();
- *na->action_ = cont;
- na->action_->server = &na->server;
+ na->action_ = new NetAcceptAction(cont, &na->server);
if (opt.frequent_accept) { // true
if (accept_threads > 0 && listen_per_thread == 0) {