Hello,

Gleb Natapov wrote:
> If it does this after opal_atomic_lock() (which is explicit memory
> barrier) then it is broken.
Than, gcc 4.1.1 on the amd64 architecture is broken:

The test-cases were compiled in the test/asm directory, with -O3

Bert


#define OMPI_BUILDING 0
#include "ompi_config.h"

#include "opal/sys/atomic.h"

static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };

int
main(int argc, char *argv[])
{
    int test = (argc == 1);

    __asm__ ("# first if\n");
    if (1 == test) {
        return 1;
    }
    __asm__ ("# lock\n");
    opal_atomic_lock(&lock);

    __asm__ ("# second if\n");
    if (1 == test) {
        __asm__ ("# if unlock\n");
        opal_atomic_unlock(&lock);
        return 2;
    }

    test = 1;
    __asm__ ("# unlock\n");
    opal_atomic_unlock(&lock);

    return 0;
}

	.file	"double_check.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
.LFB30:
#APP
	# first if

#NO_APP
	decl	%edi
	movl	$1, %eax
	je	.L4
#APP
	# lock

	.p2align 4,,7
#NO_APP
.L5:
	xorl	%edx, %edx
	movl	$1, %ecx
	movl	%edx, %eax
#APP
	lock; cmpxchgl %ecx,lock(%rip)   
	sete     %dl      

#NO_APP
	testb	%dl, %dl
	jne	.L13
	.p2align 4,,7
.L9:
	movl	lock(%rip), %eax
	decl	%eax
	je	.L9
	jmp	.L5
.L13:
#APP
	# second if

	# unlock

#NO_APP
	movl	$0, lock(%rip)
.L4:
	rep ; ret
.LFE30:
	.size	main, .-main
	.local	lock
	.comm	lock,4,4
	.section	.eh_frame,"a",@progbits
.Lframe1:
	.long	.LECIE1-.LSCIE1
.LSCIE1:
	.long	0x0
	.byte	0x1
	.string	"zR"
	.uleb128 0x1
	.sleb128 -8
	.byte	0x10
	.uleb128 0x1
	.byte	0x3
	.byte	0xc
	.uleb128 0x7
	.uleb128 0x8
	.byte	0x90
	.uleb128 0x1
	.align 8
.LECIE1:
.LSFDE1:
	.long	.LEFDE1-.LASFDE1
.LASFDE1:
	.long	.LASFDE1-.Lframe1
	.long	.LFB30
	.long	.LFE30-.LFB30
	.uleb128 0x0
	.align 8
.LEFDE1:
	.ident	"GCC: (GNU) 4.1.1"
	.section	.note.GNU-stack,"",@progbits
#define OMPI_BUILDING 0
#include "ompi_config.h"

#include "opal/sys/atomic.h"

static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };

int
main(int argc, char *argv[])
{
    volatile int test = (argc == 1);

    __asm__ ("# first if\n");
    if (1 == test) {
        return 1;
    }
    __asm__ ("# lock\n");
    opal_atomic_lock(&lock);

    __asm__ ("# second if\n");
    if (1 == test) {
        __asm__ ("# if unlock\n");
        opal_atomic_unlock(&lock);
        return 2;
    }

    test = 1;
    __asm__ ("# unlock\n");
    opal_atomic_unlock(&lock);

    return 0;
}

	.file	"double_check_volatile.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
.LFB30:
	xorl	%eax, %eax
	cmpl	$1, %edi
	sete	%al
	movl	%eax, -4(%rsp)
#APP
	# first if

#NO_APP
	movl	-4(%rsp), %eax
	movl	$1, %edx
	decl	%eax
	je	.L4
#APP
	# lock

	.p2align 4,,7
#NO_APP
.L5:
	xorl	%edx, %edx
	movl	$1, %ecx
	movl	%edx, %eax
#APP
	lock; cmpxchgl %ecx,lock(%rip)   
	sete     %dl      

#NO_APP
	testb	%dl, %dl
	jne	.L15
	.p2align 4,,7
.L11:
	movl	lock(%rip), %eax
	decl	%eax
	je	.L11
	jmp	.L5
.L15:
#APP
	# second if

#NO_APP
	movl	-4(%rsp), %eax
	decl	%eax
	jne	.L8
#APP
	# if unlock

#NO_APP
	movl	$0, lock(%rip)
	movl	$2, %edx
.L4:
	movl	%edx, %eax
	ret
.L8:
	movl	$1, -4(%rsp)
#APP
	# unlock

#NO_APP
	xorl	%edx, %edx
	movl	$0, lock(%rip)
	jmp	.L4
.LFE30:
	.size	main, .-main
	.local	lock
	.comm	lock,4,4
	.section	.eh_frame,"a",@progbits
.Lframe1:
	.long	.LECIE1-.LSCIE1
.LSCIE1:
	.long	0x0
	.byte	0x1
	.string	"zR"
	.uleb128 0x1
	.sleb128 -8
	.byte	0x10
	.uleb128 0x1
	.byte	0x3
	.byte	0xc
	.uleb128 0x7
	.uleb128 0x8
	.byte	0x90
	.uleb128 0x1
	.align 8
.LECIE1:
.LSFDE1:
	.long	.LEFDE1-.LASFDE1
.LASFDE1:
	.long	.LASFDE1-.Lframe1
	.long	.LFB30
	.long	.LFE30-.LFB30
	.uleb128 0x0
	.align 8
.LEFDE1:
	.ident	"GCC: (GNU) 4.1.1"
	.section	.note.GNU-stack,"",@progbits

Reply via email to