cmcfarlen opened a new pull request #8725:
URL: https://github.com/apache/trafficserver/pull/8725
This PR updates the THREAD_FREE macro so that the last parameter is only
evaluated one time. In
several places in the code, the final parameter is given as `this_thread()`
which was causing
excessive calls to `pthread_getspecific`. This update provides a modest
performance increase
as shown by the simple benchmark.
Benchmark results:
```
benchmark name samples iterations estimated
mean low mean high mean
std dev low std dev high std dev
-------------------------------------------------------------------------------
thread_free old 100 1 39.9577
ms
399.983 us 399.675 us 400.389
us
1.7927 us 1.46193 us 2.62895
us
thread_free new 100 1 33.6464
ms
334.064 us 333.356 us 336.995
us
6.40941 us 1.17813 us 15.0751
us
===============================================================================
```
Snip of assembly THREAD_FREE. You can see the multiple calls to
`pthread_getspecific`
```
cmpl $0x0,0x28fdb9(%rip) # 0x7a1ef0 <cmd_disable_pfreelist>
je 0x51214d <free_CacheVC(CacheVC*)+1053>
mov 0x24c150(%rip),%rdi # 0x75e290 <cacheVConnectionAllocator>
mov %rbx,%rsi
callq 0x5e0120 <ink_freelist_free@plt>
jmpq 0x512233 <free_CacheVC(CacheVC*)+1283>
mov 0x24f82d(%rip),%edi # 0x761980 <_ZN6Thread15thread_data_keyE>
callq 0x5e0140 <pthread_getspecific@plt>
mov 0x100(%rax),%rax
mov %rax,(%rbx)
mov 0x24f818(%rip),%edi # 0x761980 <_ZN6Thread15thread_data_keyE>
callq 0x5e0140 <pthread_getspecific@plt>
mov %rbx,0x100(%rax)
mov 0x24f806(%rip),%edi # 0x761980 <_ZN6Thread15thread_data_keyE>
callq 0x5e0140 <pthread_getspecific@plt>
addl $0x1,0xf8(%rax)
mov 0x24f7f4(%rip),%edi # 0x761980 <_ZN6Thread15thread_data_keyE>
callq 0x5e0140 <pthread_getspecific@plt>
mov 0xf8(%rax),%eax
cmp 0xdfc6b(%rip),%eax # 0x5f1e08 <thread_freelist_high_watermark>
jle 0x512233 <free_CacheVC(CacheVC*)+1283>
mov 0x24f7d7(%rip),%edi # 0x761980 <_ZN6Thread15thread_data_keyE>
callq 0x5e0140 <pthread_getspecific@plt>
mov 0x100(%rax),%rsi
test %rsi,%rsi
je 0x512233 <free_CacheVC(CacheVC*)+1283>
mov 0xf8(%rax),%edi
mov 0xdfc45(%rip),%r9d # 0x5f1e0c <thread_freelist_low_watermark>
cmp %r9d,%edi
jle 0x512233 <free_CacheVC(CacheVC*)+1283>
mov %r9d,%r8d
not %r8d
add %edi,%r8d
add $0x1,%r8
sub %edi,%r9d
add $0xffffffff,%edi
mov $0x1,%ecx
mov %rsi,%rbx
nopw 0x0(%rax,%rax,1)
mov %rbx,%rdx
mov (%rbx),%rbx
mov %rbx,0x100(%rax)
mov %edi,0xf8(%rax)
test %rbx,%rbx
je 0x51221c <free_CacheVC(CacheVC*)+1260>
add $0x1,%rcx
lea (%r9,%rcx,1),%r10d
add $0xffffffff,%edi
cmp $0x1,%r10d
jne 0x5121f0 <free_CacheVC(CacheVC*)+1216>
mov %r8,%rcx
test %rcx,%rcx
je 0x512233 <free_CacheVC(CacheVC*)+1283>
cmp $0x1,%rcx
je 0x512297 <free_CacheVC(CacheVC*)+1383>
mov 0x24c062(%rip),%rdi # 0x75e290 <cacheVConnectionAllocator>
callq 0x5e0220 <ink_freelist_free_bulk@plt>
xor %eax,%eax
add $0x20,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
retq
```
And then after the update with just one call:
```
cmpl $0x0,0x28fc65(%rip) # 0x7a34d0 <cmd_disable_pfreelist>
je 0x513881 <free_CacheVC(CacheVC*)+1089>
mov 0x24bffc(%rip),%rdi # 0x75f870 <cacheVConnectionAllocator>
mov %r12,%rsi
callq 0x5e1710 <ink_freelist_free@plt>
jmpq 0x513925 <free_CacheVC(CacheVC*)+1253>
mov 0x24f6d9(%rip),%edi # 0x762f60 <_ZN6Thread15thread_data_keyE>
callq 0x5e1730 <pthread_getspecific@plt>
mov 0x100(%rax),%rcx
mov %rcx,(%r12)
mov %r12,0x100(%rax)
mov 0xf8(%rax),%esi
lea 0x1(%rsi),%ecx
mov %ecx,0xf8(%rax)
cmp 0xdfb45(%rip),%esi # 0x5f33f8 <thread_freelist_high_watermark>
jl 0x513925 <free_CacheVC(CacheVC*)+1253>
mov 0xdfb41(%rip),%ecx # 0x5f33fc <thread_freelist_low_watermark>
mov %esi,%r8d
sub %ecx,%r8d
jl 0x513925 <free_CacheVC(CacheVC*)+1253>
add $0x1,%r8
mov %esi,%r9d
not %r9d
add %ecx,%r9d
mov $0x1,%ecx
mov %r12,%rbx
nopl 0x0(%rax,%rax,1)
mov %rbx,%rdx
mov (%rbx),%rbx
mov %rbx,0x100(%rax)
mov %esi,0xf8(%rax)
test %rbx,%rbx
je 0x51390b <free_CacheVC(CacheVC*)+1227>
add $0x1,%rcx
lea (%r9,%rcx,1),%edi
add $0xffffffff,%esi
cmp $0x1,%edi
jne 0x5138e0 <free_CacheVC(CacheVC*)+1184>
mov %r8,%rcx
test %rcx,%rcx
je 0x513925 <free_CacheVC(CacheVC*)+1253>
cmp $0x1,%rcx
je 0x51398b <free_CacheVC(CacheVC*)+1355>
mov 0x24bf53(%rip),%rdi # 0x75f870 <cacheVConnectionAllocator>
mov %r12,%rsi
callq 0x5e1810 <ink_freelist_free_bulk@plt>
xor %eax,%eax
add $0x20,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
retq
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]