https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

--- Comment #23 from preshing <filter-gcc at preshing dot com> ---
Hi,

I went ahead and verified this bug using a cross-compiler built from GCC 4.9.2
sources. The bug indeed exists and happens when compiling for AArch64, but not
PowerPC. Andrew's patch fixes it (changing the first ldr instruction to an ldar
in this case). Full AArch64 assembly listings below.

I've also written a blog post on this subject in the hope of clarifying the
issue for anyone determined enough to make sense of it:
http://preshing.com/20141124/fixing-gccs-implementation-of-memory_order_consume

Andrew's patch, if it works the way I understand it, seems like the correct
thing for GCC to do until somebody figures out how to safely implement the
"efficient" compiler strategy for consume semantics.

I guess the next step is to run the test suite on a few platforms to make sure
there are no regressions, then submit?

Cheers,
Jeff

------------------ AArch64 listing of threadB() without Andrew's patch:

_Z7threadBv:
.LFB2304:
        .cfi_startproc
        adrp    x1, .LANCHOR0
        stp     x29, x30, [sp, -16]!
        .cfi_def_cfa_offset 16
        .cfi_offset 29, -16
        .cfi_offset 30, -8
        add     x1, x1, :lo12:.LANCHOR0
        add     x29, sp, 0
        .cfi_def_cfa_register 29
.L10:
        add     x0, x1, 8
        ldr     w0, [x0]
        cbz     w0, .L10
        ldr     w0, [x1]
        cmp     w0, 1
        bne     .L15
        str     wzr, [x1]
        add     x0, x1, 8
        stlr    wzr, [x0]
        b       .L10
.L15:
        adrp    x3, .LANCHOR1
        adrp    x0, .LC2
        adrp    x1, .LC1
        add     x3, x3, :lo12:.LANCHOR1
        add     x0, x0, :lo12:.LC2
        add     x1, x1, :lo12:.LC1
        mov     w2, 47
        add     x3, x3, 16
        bl      __assert_fail
        .cfi_endproc

------------------ AArch64 listing of threadB() with Andrew's patch:

_Z7threadBv:
.LFB2304:
        .cfi_startproc
        adrp    x1, .LANCHOR0
        stp     x29, x30, [sp, -16]!
        .cfi_def_cfa_offset 16
        .cfi_offset 29, -16
        .cfi_offset 30, -8
        add     x1, x1, :lo12:.LANCHOR0
        add     x29, sp, 0
        .cfi_def_cfa_register 29
.L10:
        add     x0, x1, 8
        ldar    w0, [x0]
        cbz     w0, .L10
        ldr     w0, [x1]
        cmp     w0, 1
        bne     .L15
        str     wzr, [x1]
        add     x0, x1, 8
        stlr    wzr, [x0]
        b       .L10
.L15:
        adrp    x3, .LANCHOR1
        adrp    x0, .LC2
        adrp    x1, .LC1
        add     x3, x3, :lo12:.LANCHOR1
        add     x0, x0, :lo12:.LC2
        add     x1, x1, :lo12:.LC1
        mov     w2, 47
        add     x3, x3, 16
        bl      __assert_fail
        .cfi_endproc

Reply via email to