Re-attaching updated implementation just in case.
On Mon, Apr 21, 2014 at 10:33 AM, Yuri Gribov <[email protected]> wrote:
>>> So even though my implementation is slightly faster we're still
>>> getting a 70% perf hit.
>> interesting.
>>
>> can you show the assembly (objdump -d) for __asan_load8 in both variants?
>
> My disas:
>
> 00000000004cf6a0 <__asan_load8>:
> 4cf6a0: 48 89 f8 mov %rdi,%rax
> 4cf6a3: 48 c1 e8 03 shr $0x3,%rax
> 4cf6a7: 80 b8 00 80 ff 7f 00 cmpb $0x0,0x7fff8000(%rax)
> 4cf6ae: 75 08 jne 4cf6b8 <__asan_load8+0x18>
> 4cf6b0: f3 c3 repz retq
> 4cf6b2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
> 4cf6b8: e9 f3 55 fc ff jmpq 494cb0 <__asan_report_load8>
> 4cf6bd: 0f 1f 00 nopl (%rax)
>
> And here's the trunk version:
>
> 0000000000493b00 <__asan_load8>:
> 493b00: 48 89 f8 mov %rdi,%rax
> 493b03: 48 c1 e8 03 shr $0x3,%rax
> 493b07: 80 b8 00 80 ff 7f 00 cmpb $0x0,0x7fff8000(%rax)
> 493b0e: 74 40 je 493b50 <__asan_load8+0x50>
> 493b10: 48 8b 05 51 83 26 00 mov 0x268351(%rip),%rax
> # 6fbe68 <_DYNAMIC+0x13a0>
> 493b17: 48 8b 00 mov (%rax),%rax
> 493b1a: 48 85 c0 test %rax,%rax
> 493b1d: 74 09 je 493b28 <__asan_load8+0x28>
> 493b1f: 48 89 38 mov %rdi,(%rax)
> 493b22: c3 retq
> 493b23: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
> 493b28: 55 push %rbp
> 493b29: 48 89 f9 mov %rdi,%rcx
> 493b2c: 41 b9 08 00 00 00 mov $0x8,%r9d
> 493b32: 45 31 c0 xor %r8d,%r8d
> 493b35: 48 89 e5 mov %rsp,%rbp
> 493b38: 48 83 ec 10 sub $0x10,%rsp
> 493b3c: 48 8b 7d 08 mov 0x8(%rbp),%rdi
> 493b40: 48 8d 55 f8 lea -0x8(%rbp),%rdx
> 493b44: 48 89 ee mov %rbp,%rsi
> 493b47: e8 64 e3 ff ff callq 491eb0 <__asan_report_error>
> 493b4c: c9 leaveq
> 493b4d: 0f 1f 00 nopl (%rax)
> 493b50: f3 c3 repz retq
> 493b52: 66 66 66 66 66 2e 0f data32 data32 data32 data32
> nopw %cs:0x0(%rax,%rax,1)
> 493b59: 1f 84 00 00 00 00 00
>
>> If you want to rely on a custom ABI, you should implement in on both
>> callee and caller sides.
>
> Sure.
>
>> That might indeed improve the speed, but imho is not worth it here.
>
> I still think that most of the overhead comes from ABI overheads (IMHO
> x86/amd64 are particularly bad at this). E.g. removing _all_ code from
> callbacks results in 16 sec runtime (so callback code overhead is only
> (17.3 - 16)/(17.3 - 11) = 20%) so improving it further is practically
> worthless.
>
> -Y
--
You received this message because you are subscribed to the Google Groups
"address-sanitizer" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
#include "asan_internal.h"
#include "asan_mapping.h"
// FIXME: add optimized implementation for ARM.
#define DEFINE_ASAN_CHECK(type, size) \
extern "C" \
void __asan_report_ ## type ## size(uptr addr); \
extern "C" INTERFACE_ATTRIBUTE \
void __asan_ ## type ## size(uptr addr); \
void __asan_ ## type ## size(uptr addr) { \
int shadow_val = *(s8 *)MEM_TO_SHADOW(addr); \
int last = (addr & 7) + size - 1; \
if (UNLIKELY(shadow_val && last >= shadow_val)) \
__asan_report_ ## type ## size(addr); \
}
DEFINE_ASAN_CHECK(load, 1)
DEFINE_ASAN_CHECK(load, 2)
DEFINE_ASAN_CHECK(load, 4)
DEFINE_ASAN_CHECK(store, 1)
DEFINE_ASAN_CHECK(store, 2)
DEFINE_ASAN_CHECK(store, 4)
#define DEFINE_ASAN_CHECK8(type) \
extern "C" \
void __asan_report_ ## type ## 8(uptr addr); \
extern "C" INTERFACE_ATTRIBUTE \
void __asan_ ## type ## 8(uptr addr); \
void __asan_ ## type ## 8(uptr addr) { \
int shadow_val = *(s8 *)MEM_TO_SHADOW(addr); \
if (UNLIKELY(shadow_val)) \
__asan_report_ ## type ## 8(addr); \
}
DEFINE_ASAN_CHECK8(load)
DEFINE_ASAN_CHECK8(store)
// FIXME: this relies on 3-bit shadow scaling
#define DEFINE_ASAN_CHECK16(type) \
extern "C" \
void __asan_report_ ## type ## 16(uptr addr); \
extern "C" INTERFACE_ATTRIBUTE \
void __asan_ ## type ## 16(uptr addr); \
void __asan_ ## type ## 16(uptr addr) { \
int shadow_val1 = *(s8 *)MEM_TO_SHADOW(addr); \
int shadow_val2 = *(s8 *)MEM_TO_SHADOW(addr + 8); \
if (UNLIKELY(shadow_val1 || shadow_val2)) \
__asan_report_ ## type ## 16(addr); \
}
DEFINE_ASAN_CHECK16(load)
DEFINE_ASAN_CHECK16(store)
#define DEFINE_ASAN_CHECK_N(type) \
extern "C" \
void __asan_report_ ## type ## _n(uptr addr, uptr size); \
extern "C" INTERFACE_ATTRIBUTE \
uptr __asan_ ## type ## _n(uptr addr, uptr size); \
uptr __asan_ ## type ## _n(uptr addr, uptr size) { \
if (UNLIKELY(!size)) \
return addr; \
uptr addr_end = addr + size - 1; \
int shadow_val = *(s8 *)MEM_TO_SHADOW(addr); \
int shadow_val_end = *(s8 *)MEM_TO_SHADOW(addr_end); \
int last = addr & 7; \
int last_end = addr_end & 7; \
if (UNLIKELY((shadow_val && last >= shadow_val) || \
(shadow_val_end && last_end >= shadow_val_end))) \
__asan_report_ ## type ## _n(addr, size); \
return addr; \
}
DEFINE_ASAN_CHECK_N(load)
DEFINE_ASAN_CHECK_N(store)