cmcfarlen opened a new pull request #8725:
URL: https://github.com/apache/trafficserver/pull/8725


   
   This PR updates the THREAD_FREE macro so that the last parameter is only 
evaluated one time. In 
   several places in the code, the final parameter is given as `this_thread()` 
which was causing
   excessive calls to `pthread_getspecific`.  This update provides a modest 
performance increase
   as shown by the simple benchmark.
   
   Benchmark results:
   
   ```
   
   benchmark name                       samples       iterations    estimated
                                        mean          low mean      high mean
                                        std dev       low std dev   high std dev
   
-------------------------------------------------------------------------------
   thread_free old                                100             1    39.9577 
ms
                                           399.983 us    399.675 us    400.389 
us
                                            1.7927 us    1.46193 us    2.62895 
us
   
   thread_free new                                100             1    33.6464 
ms
                                           334.064 us    333.356 us    336.995 
us
                                           6.40941 us    1.17813 us    15.0751 
us
   
   
   
===============================================================================
   
   ```
   
   Snip of assembly THREAD_FREE.  You can see the multiple calls to 
`pthread_getspecific`
   
   ```
   cmpl   $0x0,0x28fdb9(%rip)        # 0x7a1ef0 <cmd_disable_pfreelist>
   je     0x51214d <free_CacheVC(CacheVC*)+1053>
   mov    0x24c150(%rip),%rdi        # 0x75e290 <cacheVConnectionAllocator>
   mov    %rbx,%rsi
   callq  0x5e0120 <ink_freelist_free@plt>
   jmpq   0x512233 <free_CacheVC(CacheVC*)+1283>
   mov    0x24f82d(%rip),%edi        # 0x761980 <_ZN6Thread15thread_data_keyE>
   callq  0x5e0140 <pthread_getspecific@plt>
   mov    0x100(%rax),%rax
   mov    %rax,(%rbx)
   mov    0x24f818(%rip),%edi        # 0x761980 <_ZN6Thread15thread_data_keyE>
   callq  0x5e0140 <pthread_getspecific@plt>
   mov    %rbx,0x100(%rax)
   mov    0x24f806(%rip),%edi        # 0x761980 <_ZN6Thread15thread_data_keyE>
   callq  0x5e0140 <pthread_getspecific@plt>
   addl   $0x1,0xf8(%rax)
   mov    0x24f7f4(%rip),%edi        # 0x761980 <_ZN6Thread15thread_data_keyE>
   callq  0x5e0140 <pthread_getspecific@plt>
   mov    0xf8(%rax),%eax
   cmp    0xdfc6b(%rip),%eax        # 0x5f1e08 <thread_freelist_high_watermark>
   jle    0x512233 <free_CacheVC(CacheVC*)+1283>
   mov    0x24f7d7(%rip),%edi        # 0x761980 <_ZN6Thread15thread_data_keyE>
   callq  0x5e0140 <pthread_getspecific@plt>
   mov    0x100(%rax),%rsi
   test   %rsi,%rsi
   je     0x512233 <free_CacheVC(CacheVC*)+1283>
   mov    0xf8(%rax),%edi
   mov    0xdfc45(%rip),%r9d        # 0x5f1e0c <thread_freelist_low_watermark>
   cmp    %r9d,%edi
   jle    0x512233 <free_CacheVC(CacheVC*)+1283>
   mov    %r9d,%r8d
   not    %r8d
   add    %edi,%r8d
   add    $0x1,%r8
   sub    %edi,%r9d
   add    $0xffffffff,%edi
   mov    $0x1,%ecx
   mov    %rsi,%rbx
   nopw   0x0(%rax,%rax,1)
   mov    %rbx,%rdx
   mov    (%rbx),%rbx
   mov    %rbx,0x100(%rax)
   mov    %edi,0xf8(%rax)
   test   %rbx,%rbx
   je     0x51221c <free_CacheVC(CacheVC*)+1260>
   add    $0x1,%rcx
   lea    (%r9,%rcx,1),%r10d
   add    $0xffffffff,%edi
   cmp    $0x1,%r10d
   jne    0x5121f0 <free_CacheVC(CacheVC*)+1216>
   mov    %r8,%rcx
   test   %rcx,%rcx
   je     0x512233 <free_CacheVC(CacheVC*)+1283>
   cmp    $0x1,%rcx
   je     0x512297 <free_CacheVC(CacheVC*)+1383>
   mov    0x24c062(%rip),%rdi        # 0x75e290 <cacheVConnectionAllocator>
   callq  0x5e0220 <ink_freelist_free_bulk@plt>
   xor    %eax,%eax
   add    $0x20,%rsp
   pop    %rbx
   pop    %r12
   pop    %r13
   pop    %r14
   pop    %r15
   retq
   
   ```
   
   And then after the update with just one call:
   
   ```
   cmpl   $0x0,0x28fc65(%rip)        # 0x7a34d0 <cmd_disable_pfreelist>
   je     0x513881 <free_CacheVC(CacheVC*)+1089>
   mov    0x24bffc(%rip),%rdi        # 0x75f870 <cacheVConnectionAllocator>
   mov    %r12,%rsi
   callq  0x5e1710 <ink_freelist_free@plt>
   jmpq   0x513925 <free_CacheVC(CacheVC*)+1253>
   mov    0x24f6d9(%rip),%edi        # 0x762f60 <_ZN6Thread15thread_data_keyE>
   callq  0x5e1730 <pthread_getspecific@plt>
   mov    0x100(%rax),%rcx
   mov    %rcx,(%r12)
   mov    %r12,0x100(%rax)
   mov    0xf8(%rax),%esi
   lea    0x1(%rsi),%ecx
   mov    %ecx,0xf8(%rax)
   cmp    0xdfb45(%rip),%esi        # 0x5f33f8 <thread_freelist_high_watermark>
   jl     0x513925 <free_CacheVC(CacheVC*)+1253>
   mov    0xdfb41(%rip),%ecx        # 0x5f33fc <thread_freelist_low_watermark>
   mov    %esi,%r8d
   sub    %ecx,%r8d
   jl     0x513925 <free_CacheVC(CacheVC*)+1253>
   add    $0x1,%r8
   mov    %esi,%r9d
   not    %r9d
   add    %ecx,%r9d
   mov    $0x1,%ecx
   mov    %r12,%rbx
   nopl   0x0(%rax,%rax,1)
   mov    %rbx,%rdx
   mov    (%rbx),%rbx
   mov    %rbx,0x100(%rax)
   mov    %esi,0xf8(%rax)
   test   %rbx,%rbx
   je     0x51390b <free_CacheVC(CacheVC*)+1227>
   add    $0x1,%rcx
   lea    (%r9,%rcx,1),%edi
   add    $0xffffffff,%esi
   cmp    $0x1,%edi
   jne    0x5138e0 <free_CacheVC(CacheVC*)+1184>
   mov    %r8,%rcx
   test   %rcx,%rcx
   je     0x513925 <free_CacheVC(CacheVC*)+1253>
   cmp    $0x1,%rcx
   je     0x51398b <free_CacheVC(CacheVC*)+1355>
   mov    0x24bf53(%rip),%rdi        # 0x75f870 <cacheVConnectionAllocator>
   mov    %r12,%rsi
   callq  0x5e1810 <ink_freelist_free_bulk@plt>
   xor    %eax,%eax
   add    $0x20,%rsp
   pop    %rbx
   pop    %r12
   pop    %r13
   pop    %r14
   pop    %r15
   retq
   
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to