Hello,
Gleb Natapov wrote:
> If it does this after opal_atomic_lock() (which is explicit memory
> barrier) then it is broken.
Than, gcc 4.1.1 on the amd64 architecture is broken:
The test-cases were compiled in the test/asm directory, with -O3
Bert
#define OMPI_BUILDING 0
#include "ompi_config.h"
#include "opal/sys/atomic.h"
static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
int
main(int argc, char *argv[])
{
int test = (argc == 1);
__asm__ ("# first if\n");
if (1 == test) {
return 1;
}
__asm__ ("# lock\n");
opal_atomic_lock(&lock);
__asm__ ("# second if\n");
if (1 == test) {
__asm__ ("# if unlock\n");
opal_atomic_unlock(&lock);
return 2;
}
test = 1;
__asm__ ("# unlock\n");
opal_atomic_unlock(&lock);
return 0;
}
.file "double_check.c"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB30:
#APP
# first if
#NO_APP
decl %edi
movl $1, %eax
je .L4
#APP
# lock
.p2align 4,,7
#NO_APP
.L5:
xorl %edx, %edx
movl $1, %ecx
movl %edx, %eax
#APP
lock; cmpxchgl %ecx,lock(%rip)
sete %dl
#NO_APP
testb %dl, %dl
jne .L13
.p2align 4,,7
.L9:
movl lock(%rip), %eax
decl %eax
je .L9
jmp .L5
.L13:
#APP
# second if
# unlock
#NO_APP
movl $0, lock(%rip)
.L4:
rep ; ret
.LFE30:
.size main, .-main
.local lock
.comm lock,4,4
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB30
.long .LFE30-.LFB30
.uleb128 0x0
.align 8
.LEFDE1:
.ident "GCC: (GNU) 4.1.1"
.section .note.GNU-stack,"",@progbits
#define OMPI_BUILDING 0
#include "ompi_config.h"
#include "opal/sys/atomic.h"
static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
int
main(int argc, char *argv[])
{
volatile int test = (argc == 1);
__asm__ ("# first if\n");
if (1 == test) {
return 1;
}
__asm__ ("# lock\n");
opal_atomic_lock(&lock);
__asm__ ("# second if\n");
if (1 == test) {
__asm__ ("# if unlock\n");
opal_atomic_unlock(&lock);
return 2;
}
test = 1;
__asm__ ("# unlock\n");
opal_atomic_unlock(&lock);
return 0;
}
.file "double_check_volatile.c"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB30:
xorl %eax, %eax
cmpl $1, %edi
sete %al
movl %eax, -4(%rsp)
#APP
# first if
#NO_APP
movl -4(%rsp), %eax
movl $1, %edx
decl %eax
je .L4
#APP
# lock
.p2align 4,,7
#NO_APP
.L5:
xorl %edx, %edx
movl $1, %ecx
movl %edx, %eax
#APP
lock; cmpxchgl %ecx,lock(%rip)
sete %dl
#NO_APP
testb %dl, %dl
jne .L15
.p2align 4,,7
.L11:
movl lock(%rip), %eax
decl %eax
je .L11
jmp .L5
.L15:
#APP
# second if
#NO_APP
movl -4(%rsp), %eax
decl %eax
jne .L8
#APP
# if unlock
#NO_APP
movl $0, lock(%rip)
movl $2, %edx
.L4:
movl %edx, %eax
ret
.L8:
movl $1, -4(%rsp)
#APP
# unlock
#NO_APP
xorl %edx, %edx
movl $0, lock(%rip)
jmp .L4
.LFE30:
.size main, .-main
.local lock
.comm lock,4,4
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB30
.long .LFE30-.LFB30
.uleb128 0x0
.align 8
.LEFDE1:
.ident "GCC: (GNU) 4.1.1"
.section .note.GNU-stack,"",@progbits