bdgranger commented on issue #7581:
URL: https://github.com/apache/trafficserver/issues/7581#issuecomment-814210091


   @shinrich thank you for the various pointers. We have pulled/ported #4379, 
#5120, and #5089, along with the parts of #5198 and #5201 having to do with 
thread_affinity into our 8.1.x fork. With this build deployed in the production 
environment, we have not seen any of the above stack traces in nearly a week at 
production traffic levels.
   
   However, on a couple of our busiest servers, we are now seeing the following 
stack traces once every 2 or 3 days, which both look like they may be related 
to #5952 and #6686. Would you agree?
   ```c++
   (gdb) bt full
   #0  Mutex_trylock (t=0x2b09f810d010, m=<optimized out>) at 
/usr/src/debug/trafficserver-8.1.2/iocore/eventsystem/I_Lock.h:287
   No locals.
   #1  MutexTryLock (t=0x2b09f810d010, am=..., this=0x2b09fa602d80) at 
/usr/src/debug/trafficserver-8.1.2/iocore/eventsystem/I_Lock.h:555
   No locals.
   #2  HostDBContinuation::probeEvent (this=this@entry=0x2b152e16dee0, 
e=e@entry=0x0) at HostDB.cc:1579
           t = 0x2b09f810d010
           lock = {m = {m_ptr = 0x0}, lock_acquired = true}
   #3  0x0000000000620096 in HostDBContinuation::dnsPendingEvent 
(this=0x2b152e16dee0, event=1, e=0x2b10a394f840) at HostDB.cc:1221
   No locals.
   #4  0x0000000000765082 in handleEvent (data=0x2b10a394f840, event=1, 
this=<optimized out>) at I_Continuation.h:187
   No locals.
   #5  EThread::process_event (this=this@entry=0x2b09f810d010, 
e=e@entry=0x2b10a394f840, calling_code=1) at UnixEThread.cc:131
           c_temp = <optimized out>
           lock = {m = {m_ptr = 0x21f1b20}, lock_acquired = true}
   #6  0x00000000007662a3 in EThread::execute_regular (this=0x2b09f810d010) at 
UnixEThread.cc:244
           done_one = true
           sleep_time = <optimized out>
           e = 0x2b10a394f840
           NegativeQueue = {<DLL<Event, Event::Link_link>> = {head = 0x0}, tail 
= 0x0}
           next_time = <optimized out>
           delta = <optimized out>
           prev_metric = 0x2b09f82178c0
           nq_count = 927
           ev_count = 927
           METRIC_INIT = {_loop_time = {_start = 0, _min = 9223372036854775807, 
_max = 0}, _events = {_min = 2147483647, _max = 0, _total = 0},
             _count = 0, _wait = 0}
   #7  0x0000000000764a7a in spawn_thread_internal (a=0x1de1670) at Thread.cc:85
           p = 0x1de1670
   #8  0x00002b09f087be65 in start_thread () from /lib64/libpthread.so.0
   No symbol table info available.
   #9  0x00002b09f15b188d in clone () from /lib64/libc.so.6
   No symbol table info available.
   ```
   ```c++
   (gdb) bt full
   #0  ink_atomiclist_push (l=l@entry=0x100b10, item=item@entry=0x2aec0b3f7da0) 
at ink_queue.cc:533
           adr_of_next = 0x0
           h = <optimized out>
   #1  0x0000000000764181 in ProtectedQueue::enqueue (this=this@entry=0x100b10, 
e=e@entry=0x2aec0b3f7da0, fast_signal=fast_signal@entry=false)
       at ProtectedQueue.cc:52
           e_ethread = 0x0
           was_empty = <optimized out>
   #2  0x000000000062bf89 in schedule (fast_signal=false, e=<optimized out>, 
this=0x0)
       at 
/usr/src/debug/trafficserver-8.1.2/iocore/eventsystem/P_UnixEThread.h:98
   No locals.
   #3  EThread::schedule_in (this=0x0, cont=0x2aec34bb8b60, t=<optimized out>, 
callback_event=2, cookie=0x0)
       at 
/usr/src/debug/trafficserver-8.1.2/iocore/eventsystem/P_UnixEThread.h:71
   No locals.
   #4  0x000000000061d20f in HostDBContinuation::do_dns 
(this=this@entry=0x2aec34bb8b60) at HostDB.cc:1704
           __FUNCTION__ = "do_dns"
   #5  0x000000000061ffda in HostDBContinuation::probeEvent 
(this=this@entry=0x2aec34bb8b60, e=e@entry=0x0) at HostDB.cc:1620
           t = <optimized out>
           lock = {m = {m_ptr = 0x2aec2381aa80}, lock_acquired = true}
   #6  0x0000000000620096 in HostDBContinuation::dnsPendingEvent 
(this=0x2aec34bb8b60, event=1, e=0x2aec753d6b80) at HostDB.cc:1221
   No locals.
   #7  0x0000000000765082 in handleEvent (data=0x2aec753d6b80, event=1, 
this=<optimized out>) at I_Continuation.h:187
   No locals.
   #8  EThread::process_event (this=this@entry=0x2ae11421a010, 
e=e@entry=0x2aec753d6b80, calling_code=1) at UnixEThread.cc:131
           c_temp = <optimized out>
           lock = {m = {m_ptr = 0x1cf88f0}, lock_acquired = true}
   #9  0x000000000076594e in EThread::process_queue 
(this=this@entry=0x2ae11421a010, 
NegativeQueue=NegativeQueue@entry=0x2ae116967e70,
       ev_count=ev_count@entry=0x2ae116967e6c, 
nq_count=nq_count@entry=0x2ae116967e68) at UnixEThread.cc:170
           e = 0x2aec753d6b80
   #10 0x0000000000765f38 in EThread::execute_regular (this=0x2ae11421a010) at 
UnixEThread.cc:230
           done_one = <optimized out>
           sleep_time = <optimized out>
           e = <optimized out>
           NegativeQueue = {<DLL<Event, Event::Link_link>> = {head = 0x0}, tail 
= 0x0}
           next_time = <optimized out>
           delta = <optimized out>
           prev_metric = 0x2ae11431c550
           nq_count = 340
           ev_count = 341
           METRIC_INIT = {_loop_time = {_start = 0, _min = 9223372036854775807, 
_max = 0}, _events = {_min = 2147483647, _max = 0, _total = 0},
             _count = 0, _wait = 0}
   #11 0x0000000000764a7a in spawn_thread_internal (a=0x18f2f60) at Thread.cc:85
           p = 0x18f2f60
   #12 0x00002ae10c589e65 in start_thread () from /lib64/libpthread.so.0
   No symbol table info available.
   #13 0x00002ae10d2bf88d in clone () from /lib64/libc.so.6
   No symbol table info available.
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to