Copilot commented on code in PR #13086:
URL: https://github.com/apache/trafficserver/pull/13086#discussion_r3082591134
##########
src/iocore/net/UnixNet.cc:
##########
@@ -200,3 +205,54 @@ initialize_thread_for_net(EThread *thread)
#endif
thread->ep = ep;
}
+
+// Late-initialization continuation for ET_NET threads. Runs on each
+// thread after start_HttpProxyServer() has created all listen sockets
+// and all initialization FDs (eventfds, cache disks, DNS sockets, log
+// files, plugin FDs) are in place.
+//
+// On Linux, when accept is performed directly on ET_NET threads
+// (accept_threads == 0), this calls unshare(CLONE_FILES) to give each
+// thread its own private kernel FD table, eliminating spinlock
+// contention on accept4/close. The underlying kernel objects
+// (struct file) are shared, so cross-thread eventfd signalling and
+// shared cache-disk FDs keep working.
+//
+// This is NOT safe with dedicated accept threads (accept_threads > 0)
+// because accepted sockets are created on the accept thread and handed
+// off to ET_NET by FD number — after unshare those FDs would not exist
+// in the ET_NET thread's private table.
+
+namespace
+{
+struct ExecThrLateCont : public Continuation {
+ int
+ mainEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
+ {
+#if defined(__linux__)
+ int accept_threads =
RecGetRecordInt("proxy.config.accept_threads").value_or(0);
+ if (accept_threads > 0) {
+ Dbg(dbg_ctl_iocore_net, "ET_NET thread %d: skipping unshare
(accept_threads=%d)", this_ethread()->id, accept_threads);
+ } else if (unshare(CLONE_FILES) < 0) {
+ Warning("ET_NET thread %d: unshare(CLONE_FILES) failed: %s",
this_ethread()->id, strerror(errno));
+ } else {
+ Dbg(dbg_ctl_iocore_net, "ET_NET thread %d: FD table unshared",
this_ethread()->id);
+ }
+#endif
+ delete this;
+ return EVENT_DONE;
+ }
+
+ ExecThrLateCont() : Continuation(nullptr) {
SET_HANDLER(&ExecThrLateCont::mainEvent); }
+};
+} // end anonymous namespace
+
+void
+exec_thr_late_init()
+{
+ int n = eventProcessor.thread_group[ET_NET]._count;
+ for (int i = 0; i < n; i++) {
+ eventProcessor.thread_group[ET_NET]._thread[i]->schedule_imm(new
ExecThrLateCont());
+ }
+ Note("Scheduled exec_thr_late_init() on %d ET_NET threads", n);
Review Comment:
`exec_thr_late_init()` is scheduled after `start_HttpProxyServer()`, which
has already scheduled `NetAccept` onto ET_NET threads. In
`proxy.config.exec_thread.listen=1` mode, `NetAccept::accept_per_thread()`
calls `do_listen()` (opens per-thread listen sockets) before your
`unshare(CLONE_FILES)` runs, so those newly-opened listen FDs can be created
while threads still share the FD table and then get copied into other threads
when they unshare later. This can unintentionally share per-thread listen
sockets and/or leak extra references. To avoid this, ensure the per-thread
`unshare(CLONE_FILES)` runs before any per-thread `do_listen()`/accept setup
executes (e.g., schedule the unshare continuation ahead of the `NetAccept`
continuations inside `NetAccept::init_accept_per_thread()` /
`accept_internal()` when `accept_threads==0`, or add a barrier so
accepts/listens don’t start until all ET_NET threads have unshared).
##########
src/iocore/net/UnixNet.cc:
##########
@@ -200,3 +205,54 @@ initialize_thread_for_net(EThread *thread)
#endif
thread->ep = ep;
}
+
+// Late-initialization continuation for ET_NET threads. Runs on each
+// thread after start_HttpProxyServer() has created all listen sockets
+// and all initialization FDs (eventfds, cache disks, DNS sockets, log
+// files, plugin FDs) are in place.
+//
+// On Linux, when accept is performed directly on ET_NET threads
+// (accept_threads == 0), this calls unshare(CLONE_FILES) to give each
+// thread its own private kernel FD table, eliminating spinlock
+// contention on accept4/close. The underlying kernel objects
+// (struct file) are shared, so cross-thread eventfd signalling and
+// shared cache-disk FDs keep working.
+//
+// This is NOT safe with dedicated accept threads (accept_threads > 0)
+// because accepted sockets are created on the accept thread and handed
+// off to ET_NET by FD number — after unshare those FDs would not exist
+// in the ET_NET thread's private table.
+
+namespace
+{
+struct ExecThrLateCont : public Continuation {
+ int
+ mainEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
+ {
+#if defined(__linux__)
+ int accept_threads =
RecGetRecordInt("proxy.config.accept_threads").value_or(0);
+ if (accept_threads > 0) {
+ Dbg(dbg_ctl_iocore_net, "ET_NET thread %d: skipping unshare
(accept_threads=%d)", this_ethread()->id, accept_threads);
+ } else if (unshare(CLONE_FILES) < 0) {
+ Warning("ET_NET thread %d: unshare(CLONE_FILES) failed: %s",
this_ethread()->id, strerror(errno));
+ } else {
+ Dbg(dbg_ctl_iocore_net, "ET_NET thread %d: FD table unshared",
this_ethread()->id);
+ }
Review Comment:
`unshare(CLONE_FILES)` failure logs a `Warning` from every ET_NET thread. On
common systems where `unshare()` is blocked (e.g., EPERM due to userns
restrictions), this can spam logs at startup. Consider logging only once (e.g.,
only from thread 0 / first failure) or rate-limiting, and use `Dbg` for
per-thread details.
##########
src/traffic_server/traffic_server.cc:
##########
@@ -2456,6 +2457,7 @@ main(int /* argc ATS_UNUSED */, const char **argv)
// In either case we should not delay to accept the ports.
Dbg(dbg_ctl_http_listen, "Not delaying listen");
start_HttpProxyServer(); // PORTS_READY_HOOK called from in here
+ exec_thr_late_init(); // Must be last call before
emit_fully_initialized_message()
Review Comment:
Same concern as the delayed-listen path: scheduling `exec_thr_late_init()`
only after `start_HttpProxyServer()` can allow per-thread accept/listen setup
(and `proxy.config.exec_thread.listen=1` per-thread `do_listen()`) to run
before FD-table unshare. This can cause unintended FD-table copying/sharing and
leaks. Consider ensuring unshare runs before any acceptor setup starts on
ET_NET threads, or gate accept startup on completion of the unshare barrier.
```suggestion
exec_thr_late_init();
start_HttpProxyServer(); // PORTS_READY_HOOK called from in here
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]