https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65709

Paolo Bonzini <bonzini at gnu dot org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |bonzini at gnu dot org

--- Comment #20 from Paolo Bonzini <bonzini at gnu dot org> ---
> how to efficiently access unaligned memory ?

Use memcpy between unsigned char pointers and with a constant size.  The
compiler knows to translate it to an unaligned memory access, or even a
combination of unaligned and aligned memory accesses:

$ cat f.c
int f(char *restrict a, const char *restrict b)
{
        int i;
        for (i = 0; i < 512; i++)
                a[i] = b[i];
}

$ gcc f.c -O3 -S -o f.s -fdump-tree-optimized
$ cat f.c.191t.optimized

;; Function f (f, funcdef_no=0, decl_uid=1832, cgraph_uid=0, symbol_order=0)

f (char * restrict a, const char * restrict b)
{
  <bb 2>:
  __builtin_memcpy (a_5(D), b_7(D), 512); [tail call]
  return;

}

$ cat f.s
...
f:
        .cfi_startproc
        movq    (%rsi), %rdx
        movq    %rdi, %rax
        leaq    8(%rdi), %rdi
        movq    %rdx, -8(%rdi)
        movq    504(%rsi), %rdx
        movq    %rdx, 496(%rdi)
        andq    $-8, %rdi
        subq    %rdi, %rax
        subq    %rax, %rsi
        addl    $512, %eax
        shrl    $3, %eax
        movl    %eax, %ecx
        rep movsq
        ret
...

It's doing unaligned accesses for the first and last 8 bytes, and 31 aligned
8-byte accesses in the middle.

Reply via email to