Right, that was a pretty intense discussion. However, I don't think we talked 
about replacing the = by a +. The difference is that = means write and + means 
read&write. Here is the assembly output from gcc -O3:

with output "=m" (*v)                      |  with output "+m" (*v)
and  input  "ir" (i), "m" (*v)             |  and  input  "r" (i)
                                           |
_opal_atomic_add_32:                       |  _opal_atomic_add_32:
LFB5:                                      |  LFB5:
        pushq   %rbp                       |    pushq   %rbp
LCFI3:                                     |  LCFI3:
        movq    %rsp, %rbp                 |    movq    %rsp, %rbp
LCFI4:                                     |  LCFI4:
        movq    %rdi, -8(%rbp)             |    movq    %rdi, -8(%rbp)
        movl    %esi, -12(%rbp)            |    movl    %esi, -12(%rbp)
        movq    -8(%rbp), %rcx             |    movq    -8(%rbp), %rcx
        movl    -12(%rbp), %edx            |    movl    -12(%rbp), %edx
        movq    -8(%rbp), %rax             |    movq    -8(%rbp), %rax
        lock;addl %edx,(%rcx)              |    lock;addl %edx,(%rcx)
        movq    -8(%rbp), %rax             |    movq    -8(%rbp), %rax
        movl    (%rax), %eax               |    movl    (%rax), %eax
        leave                              |    leave
        ret                                |    ret

It generates multiple loads as %ras is updated before the lock. Useless!

Now, if we put on the output "=m"(*v) and skip the (*)v in the input arguments 
the code looks like this:

LFB7:
        pushq   %rbp
LCFI0:
        movq    %rsp, %rbp
LCFI1:
        movl    $0, -4(%rbp)
        leaq    -4(%rbp), %rax
        movl    $1, %edx
        lock
        addl %edx,(%rax)
        movl    -4(%rbp), %eax
        leave
        ret

Which is a LOT better. Not perfect as it still generate a load after the locked 
addl, but this is because we wanted to return the (*v).

Thus the code should look at least like this

static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
{
   __asm__ __volatile__(
                        SMPLOCK "addl %1,%0"
                        :"=m" (*v)
                        :"r" (i));
   return (*v);  /* should be an atomic operation */
}

Now, if what we want back from this function is the __REAL__ result of the 
atomic addition, then the code is wrong. Well, mostly wrong under heavy usage 
(i.e. multiple threads doing atomics on the same variable).

Here is the opal_atomic_add_32 returning the correct result. This is similar to 
the atomic intrinsic called add_and_fetch.

static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i)
{
   int ret = i;
   __asm__ __volatile__(
                        SMPLOCK "xaddl %1,%0"
                        :"=m" (*v), "+r" (ret)
                        );
   return ret+i;
}

  george.

On Aug 25, 2010, at 10:58 , Rolf vandeVaart wrote:

> With respect to the warnings with atomic.h, we have been down this road 
> before.
> Here is the ticket with the background information.
> 
> https://svn.open-mpi.org/trac/ompi/ticket/1500
> 
> Eventually, we decided to just live with the warnings.  However, I will take 
> a look at George's two suggestions.
> 
> Rolf
> 
> 
> 
> On 08/24/10 21:28, George Bosilca wrote:
>> On Aug 24, 2010, at 20:40 , Paul H. Hargrove wrote:
>> 
>>   
>> 
>>> "../../../openmpi-1.5rc5/opal/include/opal/sys/ia32/atomic.h", line 170: 
>>> warning: impossible constraint for "%1" asm operand
>>>     
>>> 
>> 
>>    __asm__ __volatile__(
>>                         SMPLOCK "addl %1,%0"
>>                         :"=m" (*v)
>>                         :"ir" (i), "m" (*v));
>> 
>> The problem seems to come from the "ir". Based on a Sun blog about the gcc 
>> style asm inlining support (
>> http://blogs.sun.com/x86be/entry/gcc_style_asm_inlining_support
>> ) it appears that i (any size integer immediate constraint) and r (any 
>> registers in rax, rbx, rcx, rdx, rbp, rsi, rdi, rsp, r8 - r15). As we don't 
>> only apply our atomics on immediate I think we should drop the "i".
>> 
>>   
>> 
>>> "../../../openmpi-1.5rc5/opal/include/opal/sys/ia32/atomic.h", line 170: 
>>> warning: parameter in inline asm statement unused: %2
>>>     
>>> 
>> 
>> This one is more trickier. Because of the %2 I suspect that the second (*v) 
>> on the inputs is not matched to the first (*v) on the outputs. While this 
>> might be significantly bad under some circumstances, in this case I think it 
>> can be safely ignored.
>> 
>> However I would like to try the following asm code instead with the SUN 
>> compiler:
>> 
>>    __asm__ __volatile__(
>>                         SMPLOCK "addl %1,%0"
>>                         :"+m" (*v)
>>                         :"r" (i));
>> 
>>   Thanks,
>>     george.
>> 
>> 
>>   
>> 
>>> "../../../openmpi-1.5rc5/opal/include/opal/sys/ia32/atomic.h", line 187: 
>>> warning: impossible constraint for "%1" asm operand
>>> "../../../openmpi-1.5rc5/opal/include/opal/sys/ia32/atomic.h", line 187: 
>>> warning: parameter in inline asm statement unused: %2
>>> 
>>> ../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 145: Warning 
>>> (Anachronism): Formal argument read_conversion_fn of type extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*) in call to 
>>> MPI_Register_datarep(char*, extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 146: Warning 
>>> (Anachronism): Formal argument write_conversion_fn of type extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*) in call to 
>>> MPI_Register_datarep(char*, extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 147: Warning 
>>> (Anachronism): Formal argument dtype_file_extent_fn of type extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*) in call to MPI_Register_datarep(char*, 
>>> extern "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern 
>>> "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(ompi_datatype_t*,int*,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 172: Warning 
>>> (Anachronism): Formal argument write_conversion_fn of type extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*) in call to 
>>> MPI_Register_datarep(char*, extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 173: Warning 
>>> (Anachronism): Formal argument dtype_file_extent_fn of type extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*) in call to MPI_Register_datarep(char*, 
>>> extern "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern 
>>> "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(ompi_datatype_t*,int*,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 197: Warning 
>>> (Anachronism): Formal argument read_conversion_fn of type extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*) in call to 
>>> MPI_Register_datarep(char*, extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(void*,ompi_datatype_t*,int,void*,long long,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 199: Warning 
>>> (Anachronism): Formal argument dtype_file_extent_fn of type extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*) in call to MPI_Register_datarep(char*, 
>>> extern "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern 
>>> "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(ompi_datatype_t*,int*,void*).
>>> "../../../../openmpi-1.5rc5/ompi/mpi/cxx/file.cc", line 224: Warning 
>>> (Anachronism): Formal argument dtype_file_extent_fn of type extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*) in call to MPI_Register_datarep(char*, 
>>> extern "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern 
>>> "C" int(*)(void*,ompi_datatype_t*,int,void*,long long,void*), extern "C" 
>>> int(*)(ompi_datatype_t*,int*,void*), void*) is being passed 
>>> int(*)(ompi_datatype_t*,int*,void*).
>>> [Though line numbers differ very slightly]
>>> 
>>> 
>>> NEW, not seen with 1.4.3rc1, warnings:
>>> 
>>> Many instances of the following:
>>> 
>>> "../../../../openmpi-1.5rc5/orte/mca/ess/ess.h", line 61: warning: 
>>> attribute "noreturn" may not be applied to variable, ignored
>>> "../../../../openmpi-1.5rc5/orte/mca/errmgr/errmgr.h", line 138: warning: 
>>> attribute "noreturn" may not be applied to variable, ignored
>>> [Due to applying __opal_attribute_noreturn__ to a function pointer]
>>> 
>>> Single instances of the following:
>>> 
>>> "../../../../../openmpi-1.5rc5/opal/mca/crs/none/crs_none_module.c", line 
>>> 136: warning: statement not reached
>>> "../../../../openmpi-1.5rc5/orte/mca/plm/base/plm_base_rsh_support.c", line 
>>> 462: warning: implicit function declaration: rindex
>>> "../../../../openmpi-1.5rc5/orte/mca/plm/base/plm_base_rsh_support.c", line 
>>> 462: warning: improper pointer/integer combination: op "="
>>> "../../../../openmpi-1.5rc5/orte/mca/plm/base/plm_base_rsh_support.c", line 
>>> 565: warning: improper pointer/integer combination: op "="
>>> "../../../../../openmpi-1.5rc5/orte/mca/rmcast/tcp/rmcast_tcp.c", line 982: 
>>> warning: assignment type mismatch:
>>> "../../../../../openmpi-1.5rc5/orte/mca/rmcast/tcp/rmcast_tcp.c", line 
>>> 1023: warning: assignment type mismatch:
>>> "../../../../../openmpi-1.5rc5/orte/mca/rmcast/udp/rmcast_udp.c", line 877: 
>>> warning: assignment type mismatch:
>>> "../../../../../openmpi-1.5rc5/orte/mca/rmcast/udp/rmcast_udp.c", line 918: 
>>> warning: assignment type mismatch:
>>> "../../../../openmpi-1.5rc5/orte/tools/orte-ps/orte-ps.c", line 288: 
>>> warning: initializer does not fit or is out of range: 0xfffffffe
>>> "../../../../openmpi-1.5rc5/orte/tools/orte-ps/orte-ps.c", line 289: 
>>> warning: initializer does not fit or is out of range: 0xfffffffe
>>> "
>>> "../../../../../openmpi-1.5rc5/ompi/mca/common/sm/common_sm_mmap.c", line 
>>> 110: warning: improper pointer/integer combination: arg #3
>>> "../../../../../openmpi-1.5rc5/ompi/mca/common/sm/common_sm_mmap.c", line 
>>> 135: warning: improper pointer/integer combination: arg #3
>>> "../../../../../openmpi-1.5rc5/ompi/mca/common/sm/common_sm_mmap.c", line 
>>> 201: warning: assignment type mismatch:
>>>       pointer to char "=" pointer to int
>>> "../../../../../openmpi-1.5rc5/ompi/mca/common/sm/common_sm_mmap.c", line 
>>> 207: warning: assignment type mismatch:
>>>       pointer to char "=" pointer to int
>>> "../../../../../openmpi-1.5rc5/ompi/mca/common/sm/common_sm_mmap.c", line 
>>> 280: warning: argument #1 is incompatible with prototype:
>>>       prototype: pointer to char : "/usr/include/sys/mman.h", line 238
>>>       argument : pointer to struct mca_common_sm_mmap_t {struct 
>>> opal_list_item_t {..} map_item, pointer to struct 
>>> mca_common_sm_file_header_t {..} map_seg, pointer to unsigned char 
>>> map_addr, pointer to unsigned char data_addr, unsigned int map_size, 
>>> array[1025] of char map_path}
>>> 
>>> 
>>> -Paul
>>> 
>>> -- 
>>> Paul H. Hargrove                          
>>> phhargr...@lbl.gov
>>> 
>>> Future Technologies Group
>>> HPC Research Department                   Tel: +1-510-495-2352
>>> Lawrence Berkeley National Laboratory     Fax: +1-510-486-6900
>>> 
>>> _______________________________________________
>>> devel mailing list
>>> 
>>> de...@open-mpi.org
>>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>>> 
>>>     
>>> 
>> 
>> 
>> _______________________________________________
>> devel mailing list
>> 
>> de...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>> 
>>   
>> 
> 
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


Reply via email to