Re-attaching updated implementation just in case.

On Mon, Apr 21, 2014 at 10:33 AM, Yuri Gribov <[email protected]> wrote:
>>> So even though my implementation is slightly faster we're still
>>> getting a 70% perf hit.
>> interesting.
>>
>> can you show the assembly (objdump -d) for __asan_load8 in both variants?
>
> My disas:
>
> 00000000004cf6a0 <__asan_load8>:
>   4cf6a0:       48 89 f8                mov    %rdi,%rax
>   4cf6a3:       48 c1 e8 03             shr    $0x3,%rax
>   4cf6a7:       80 b8 00 80 ff 7f 00    cmpb   $0x0,0x7fff8000(%rax)
>   4cf6ae:       75 08                   jne    4cf6b8 <__asan_load8+0x18>
>   4cf6b0:       f3 c3                   repz retq
>   4cf6b2:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
>   4cf6b8:       e9 f3 55 fc ff          jmpq   494cb0 <__asan_report_load8>
>   4cf6bd:       0f 1f 00                nopl   (%rax)
>
> And here's the trunk version:
>
> 0000000000493b00 <__asan_load8>:
>   493b00:       48 89 f8                mov    %rdi,%rax
>   493b03:       48 c1 e8 03             shr    $0x3,%rax
>   493b07:       80 b8 00 80 ff 7f 00    cmpb   $0x0,0x7fff8000(%rax)
>   493b0e:       74 40                   je     493b50 <__asan_load8+0x50>
>   493b10:       48 8b 05 51 83 26 00    mov    0x268351(%rip),%rax
>    # 6fbe68 <_DYNAMIC+0x13a0>
>   493b17:       48 8b 00                mov    (%rax),%rax
>   493b1a:       48 85 c0                test   %rax,%rax
>   493b1d:       74 09                   je     493b28 <__asan_load8+0x28>
>   493b1f:       48 89 38                mov    %rdi,(%rax)
>   493b22:       c3                      retq
>   493b23:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
>   493b28:       55                      push   %rbp
>   493b29:       48 89 f9                mov    %rdi,%rcx
>   493b2c:       41 b9 08 00 00 00       mov    $0x8,%r9d
>   493b32:       45 31 c0                xor    %r8d,%r8d
>   493b35:       48 89 e5                mov    %rsp,%rbp
>   493b38:       48 83 ec 10             sub    $0x10,%rsp
>   493b3c:       48 8b 7d 08             mov    0x8(%rbp),%rdi
>   493b40:       48 8d 55 f8             lea    -0x8(%rbp),%rdx
>   493b44:       48 89 ee                mov    %rbp,%rsi
>   493b47:       e8 64 e3 ff ff          callq  491eb0 <__asan_report_error>
>   493b4c:       c9                      leaveq
>   493b4d:       0f 1f 00                nopl   (%rax)
>   493b50:       f3 c3                   repz retq
>   493b52:       66 66 66 66 66 2e 0f    data32 data32 data32 data32
> nopw %cs:0x0(%rax,%rax,1)
>   493b59:       1f 84 00 00 00 00 00
>
>> If you want to rely on a custom ABI, you should implement in on both
>> callee and caller sides.
>
> Sure.
>
>> That might indeed improve the speed, but imho is not worth it here.
>
> I still think that most of the overhead comes from ABI overheads (IMHO
> x86/amd64 are particularly bad at this). E.g. removing _all_ code from
> callbacks results in 16 sec runtime (so callback code overhead is only
> (17.3 - 16)/(17.3 - 11) =  20%) so improving it further is practically
> worthless.
>
> -Y

-- 
You received this message because you are subscribed to the Google Groups 
"address-sanitizer" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.
#include "asan_internal.h"
#include "asan_mapping.h"

// FIXME: add optimized implementation for ARM.

#define DEFINE_ASAN_CHECK(type, size)               \
  extern "C"                                        \
  void __asan_report_ ## type ## size(uptr addr);   \
  extern "C" INTERFACE_ATTRIBUTE                    \
  void __asan_ ## type ## size(uptr addr);          \
  void __asan_ ## type ## size(uptr addr) {         \
    int shadow_val = *(s8 *)MEM_TO_SHADOW(addr);    \
    int last = (addr & 7) + size - 1;               \
    if (UNLIKELY(shadow_val && last >= shadow_val)) \
      __asan_report_ ## type ## size(addr);         \
  }

DEFINE_ASAN_CHECK(load, 1)
DEFINE_ASAN_CHECK(load, 2)
DEFINE_ASAN_CHECK(load, 4)
DEFINE_ASAN_CHECK(store, 1)
DEFINE_ASAN_CHECK(store, 2)
DEFINE_ASAN_CHECK(store, 4)

#define DEFINE_ASAN_CHECK8(type)                    \
  extern "C"                                        \
  void __asan_report_ ## type ## 8(uptr addr);      \
  extern "C" INTERFACE_ATTRIBUTE                    \
  void __asan_ ## type ## 8(uptr addr);             \
  void __asan_ ## type ## 8(uptr addr) {            \
    int shadow_val = *(s8 *)MEM_TO_SHADOW(addr);    \
    if (UNLIKELY(shadow_val))                       \
      __asan_report_ ## type ## 8(addr);            \
  }

DEFINE_ASAN_CHECK8(load)
DEFINE_ASAN_CHECK8(store)

// FIXME: this relies on 3-bit shadow scaling
#define DEFINE_ASAN_CHECK16(type)                        \
  extern "C"                                             \
  void __asan_report_ ## type ## 16(uptr addr);          \
  extern "C" INTERFACE_ATTRIBUTE                         \
  void __asan_ ## type ## 16(uptr addr);                 \
  void __asan_ ## type ## 16(uptr addr) {                \
    int shadow_val1 = *(s8 *)MEM_TO_SHADOW(addr);        \
    int shadow_val2 = *(s8 *)MEM_TO_SHADOW(addr + 8);    \
    if (UNLIKELY(shadow_val1 || shadow_val2))            \
      __asan_report_ ## type ## 16(addr);                \
  }

DEFINE_ASAN_CHECK16(load)
DEFINE_ASAN_CHECK16(store)

#define DEFINE_ASAN_CHECK_N(type)                                 \
  extern "C"                                                      \
  void __asan_report_ ## type ## _n(uptr addr, uptr size);        \
  extern "C" INTERFACE_ATTRIBUTE                                  \
  uptr __asan_ ## type ## _n(uptr addr, uptr size);               \
  uptr __asan_ ## type ## _n(uptr addr, uptr size) {              \
    if (UNLIKELY(!size))                                          \
      return addr;                                                \
    uptr addr_end = addr + size - 1;                              \
    int shadow_val = *(s8 *)MEM_TO_SHADOW(addr);                  \
    int shadow_val_end = *(s8 *)MEM_TO_SHADOW(addr_end);          \
    int last = addr & 7;                                          \
    int last_end = addr_end & 7;                                  \
    if (UNLIKELY((shadow_val && last >= shadow_val) ||            \
                 (shadow_val_end && last_end >= shadow_val_end))) \
      __asan_report_ ## type ## _n(addr, size);                   \
    return addr;                                                  \
  }

DEFINE_ASAN_CHECK_N(load)
DEFINE_ASAN_CHECK_N(store)

Reply via email to