sudheerv opened a new issue #7284:
URL: https://github.com/apache/trafficserver/issues/7284
This is a follow up issue to #7096
PR #7278 fixed the crash due to race condition during server session
acquisition but the fix did not actually solve the problem for the same race
during server session closing.
It turns out the root cause is that the read buffer attached to the Server
NetVC is actually "owned" by Http1ServerSession object which destroys itself
along with the mutex associated when calling do_io_close(). The actual NetVC
may be doing a read in a different thread at the same time and is now using the
read buffer that is freed by Http1ServerSession. This seems to happen more
again with Transform plugins at play. I tried to switch the ownership of the
read buffer to the Server NetVC, but, could not get it working correctly as it
seems to expose even more race conditions and asserts all over the place.
Instead, the solution I implemented is to not destroy the Server Session
during do_io_close(), but add it to a separate unshared/gc session pool, which
will eventually clean up these sessions on a subsequent event from either
inactivity cop or a stray read that was racing with it. Moving them to a
separate gc session pool allows to also guard the event handling with a
dedicated mutex against the read. This mutex guard can not be easily added
within an SM call back since that'd mean handling a lock failure etc which
introduces complexity into the state machine.
Here's the stack trace and event history showing the problem.
{code}
(gdb) p vc->netvc_context
$3 = NET_VCONNECTION_OUT
(gdb) f vc->read
Value can't be converted to integer.
(gdb) p vc->read
$4 = {
enabled = 0,
vio = {
cont = 0x0,
nbytes = 0,
ndone = 0,
op = 0,
buffer = {
mbuf = 0x2aac44662270,
entry = 0x0
},
vc_server = 0x2aabc545f6e0,
mutex = {
m_ptr = 0x2aab2bd1fe40
},
_disabled = false
},
ready_link = {
<SLink<NetEvent>> = {
next = 0x0
},
members of Link<NetEvent>:
prev = 0x0
},
enable_link = {
next = 0x0
},
in_enabled_list = 0,
triggered = 0
}
(gdb) p vc->read.vio.buffer.mbuf
$5 = (MIOBuffer *) 0x2aac44662270
(gdb) p *vc->read.vio.buffer.mbuf
$6 = {
size_index = 46918671922096,
water_mark = 0,
_writer = {
m_ptr = 0x0
},
readers = {{
accessor = 0x0,
mbuf = 0x0,
block = {
m_ptr = 0x0
},
start_offset = 0,
size_limit = 9223372036854775807
}, {
accessor = 0x0,
mbuf = 0x0,
block = {
m_ptr = 0x0
},
start_offset = 0,
size_limit = 9223372036854775807
}, {
accessor = 0x0,
mbuf = 0x0,
block = {
m_ptr = 0x0
},
start_offset = 0,
size_limit = 9223372036854775807
}, {
accessor = 0x0,
mbuf = 0x0,
block = {
m_ptr = 0x0
},
start_offset = 0,
size_limit = 9223372036854775807
}, {
accessor = 0x0,
mbuf = 0x0,
block = {
m_ptr = 0x0
---Type <return> to continue, or q <return> to quit---
},
start_offset = 0,
size_limit = 9223372036854775807
}},
_location = 0x7e8250 "memory/IOBuffer/Http1ServerSession.cc:78"
}
(gdb) ptype vc->read.vio.buffer
gdb) p vc->history
$1 = {
history = {{
location = {
file = 0x7e8420 "HttpSessionManager.cc",
func = 0x7e8810 <HttpSessionManager::acquire_session(Continuation*,
sockaddr const*, char const*, ProxyTransaction*, HttpSM*)::__FUNCTION__>
"acquire_session",
line = 420
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x8286af "write_signal_and_update",
func = 0x7e5f4b "&HttpSM::main_handler",
line = -1058816000
},
event = 103,
reentrancy = 1
}, {
location = {
file = 0x8286fb "read_signal_and_update",
func = 0x7e5f4b "&HttpSM::main_handler",
line = -1058816000
},
event = 100,
reentrancy = 1
}, {
location = {
file = 0x8286fb "read_signal_and_update",
func = 0x7f0aac "&HttpTunnel::main_handler",
line = -1058816000
},
event = 102,
reentrancy = 1
}, {
location = {
file = 0x7e5f1e "HttpSM.cc",
func = 0x7e7640 <HttpSM::tunnel_handler_server(int,
HttpTunnelProducer*)::__FUNCTION__> "tunnel_handler_server",
line = 3245
},
event = 102,
reentrancy = 0
}, {
location = {
---Type <return> to continue, or q <return> to quit---
file = 0x7e8420 "HttpSessionManager.cc",
func = 0x7e8800
<HttpSessionManager::release_session(Http1ServerSession*)::__FUNCTION__>
"release_session",
line = 460
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x8286fb "read_signal_and_update",
func = 0x7e8750 "&ServerSessionPool::eventHandler",
line = -1058816000
},
event = 104,
reentrancy = 1
}, {
location = {
file = 0x7e8420 "HttpSessionManager.cc",
func = 0x7e8820 <ServerSessionPool::eventHandler(int,
void*)::__FUNCTION__> "eventHandler",
line = 285
},
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x7e8158 "Http1ServerSession.cc",
func = 0x7e8300 <Http1ServerSession::do_io_close(int)::__FUNCTION__>
"do_io_close",
line = 120
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x81c03b "do_io_close",
func = 0x7e8750 "&ServerSessionPool::eventHandler",
line = 8292176
},
event = 0,
reentrancy = 1
}, {
location = {
file = 0x8286fb "read_signal_and_update",
func = 0x8286ed "free_netevent",
line = -1058816000
---Type <return> to continue, or q <return> to quit---
},
event = 104,
reentrancy = 0
}, {
location = {
file = 0x820e97 "SSLNetVConnection::free",
func = 0x7cc7fb "null",
line = -1058816000
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x821237 "SSLNetVConnection::clear",
func = 0x7cc7fb "null",
line = -1058816000
},
event = 0,
reentrancy = 0
}, {
location = {
file = 0x0,
func = 0x0,
line = 0
},
event = 0,
reentrancy = 0
} <repeats 52 times>},
history_pos = 13
{code}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]