Still no good, the opal_atomic_cmpset_32 is not inlined. Try to add -O3 to your 
command line, this helped for gcc.

  Thanks,
    george.

On Jun 8, 2010, at 14:14 , Jeff Squyres wrote:

> On Jun 8, 2010, at 9:53 AM, George Bosilca wrote:
> 
>> As you can see there is no explicit call, the opal_atomic_cmpset_32 is 
>> really inlined. I think the problem is that you didn't specify the -O3 flag 
>> on your command line.
> 
> Ah, you wanted me to compile the OMPI code itself and send you the assembly.  
> That's not what you asked for.  :-)
> 
> (I just took the code you sent in the mail, stuffed it into george.c, and 
> compiled that with -s -- outside of the context of the Open MPI code tree)
> 
> Here's the new output.  It still didn't inline, but you can see the code for 
> the _cmpset function:
> 
> -----
> [7:13] svbu-mpi:~/tmp % cat george.c                                          
>   
> #include <stdint.h>
> 
> #include "opal/sys/atomic.h"
> 
> int foo(void) {
>  int32_t oldval, delta;
>  int32_t *addr = 0;
>   do {
>      oldval = *addr;
>   } while (0 == opal_atomic_cmpset_32(addr, oldval, oldval + delta));
>   return (oldval + delta);
> }
> 
> [7:13] svbu-mpi:~/tmp % pgcc -O3 -I /home/jsquyres/svn/ompi4 
> -I/home/jsquyres/svn/ompi4/opal/include -c -s george.c
> [7:13] svbu-mpi:~/tmp % cat george.s                                          
>         .file   "george.c"
>       .version        "01.01"
> ## PGC 7.0 -opt 1
> ## PGC 06/08/2010  05:10:04
> ## pgcc george.c -c -S
> ## /opt/pgi/7.0.7/linux86-64/7.0-7/bin/pgc
> ## george.c -opt 1 -terse 1 -inform warn -x 119 0xa10000 -x 122 0x40 -x 123 
> 0x1000
> ## -x 127 4 -x 127 16 -x 19 0x400000 -x 28 0x40000 -x 70 0x8000 -x 122 1 -quad
> ## -x 59 4 -x 59 4 -tp p7-64 -astype 0 -stdinc 
> /opt/pgi/7.0.7/linux86-64/7.0-7/include:/usr/local/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/include
> ## -def unix -def __unix -def __unix__ -def linux -def __linux -def __linux__
> ## -def __NO_MATH_INLINES -def __x86_64__ -def 
> __LONG_MAX__=9223372036854775807L
> ## -def __SIZE_TYPE__=unsigned long int -def __PTRDIFF_TYPE__=long int -def 
> __THROW=
> ## -def __extension__= -def __amd64__ -def __SSE__ -def __MMX__ -def __SSE2__
> ## -def __SSE3__ -predicate #machine(x86_64) #lint(off) #system(posix) 
> #cpu(x86_64)
> ## -cmdline +pgcc george.c -c -S -x 123 4 -x 123 0x80000000 -alwaysinline 
> /opt/pgi/7.0.7/linux86-64/7.0-7/lib/libintrinsics.il 4
> ## -asm george.s
> ## lineno: 3
>       .text
>       .align  16
>       .globl  foo
> foo:
> ..Dcfb0:
>       pushq   %rbp
> ..Dcfi0:
>       movq    %rsp, %rbp
> ..Dcfi1:
>       subq    $16, %rsp
> ..EN1:
> ## lineno: 5
>       movq    $0, -8(%rbp)
>       .p2align        4,,3
> .LB157:
> ## lineno: 6
>       movq    -8(%rbp), %rdi
>       movl    (%rdi), %esi
>       movl    %esi, -12(%rbp)
>       movl    -16(%rbp), %edx
>       addl    %esi, %edx
>       xorl    %eax, %eax
>       call    opal_atomic_cmpset_32
>       testl   %eax, %eax
>       je      .LB157
>       movl    -16(%rbp), %eax
>       addl    -12(%rbp), %eax
> ## lineno: 10
>       leave
>       ret
>       .type   foo,@function
>       .size   foo,.-foo
> ..Dcfe0:
> __fooEND:
>       .section        .pgi_trace
>       .align  8
>       .quad   foo     ## address of routine
>       .quad   __fooEND - foo  ## size of routine
>       .2byte  0       ## flags for future use
>       .2byte  3       ## length of following string
> ## name:foo:
>       .byte   0x66,0x6f,0x6f,0x00
>       .data
>       .globl  opal_atomic_cmpset_32
>       .section        .debug_frame
> ..Dcieb0:
>       .4byte  ..Dciee0-..Dcieb0-4     ## CIE length
>       .4byte  0xffffffff      ## CIE ID
>       .byte   0x1     ## CIE version
>       .byte   0x0     ## no augmentation
>       .byte   0x1     ## ULEB128 1, code alignment factor
>       .byte   0x78    ## SLEB128 -8, data alignment factor
>       .byte   0x10    ## return address column
>       .byte   0xc     ## DW_CFA_def_cfa (col 7)
>       .byte   0x7     ## ULEB128 7
>       .byte   0x8     ## ULEB128 8
>       .byte   0x90    ## DW_CFA_offset (col 16)
>       .byte   0x1     ## ULEB128 1
>       .align  8
> ..Dciee0:
>       .4byte  ..Dfdee0-..Dfdeb0       ## FDE length
> ..Dfdeb0:
>       .4byte  ..Dcieb0        ## CIE pointer
>       .quad   ..Dcfb0 ## initial location
>       .quad   ..Dcfe0-..Dcfb0 ## address range
>       .byte   0x4     ## DW_CFA_advance_loc4
>       .4byte  ..Dcfi0-..Dcfb0
>       .byte   0xe     ## DW_CFA_def_cfa_offset
>       .byte   0x10    ## ULEB128 16
>       .byte   0x86    ## DW_CFA_offset (col 6)
>       .byte   0x2     ## ULEB128 2
>       .byte   0x4     ## DW_CFA_advance_loc4
>       .4byte  ..Dcfi1-..Dcfi0
>       .byte   0xd     ## DW_CFA_def_cfa_register (col 6)
>       .byte   0x6     ## ULEB128 6
>       .align  8
> ..Dfdee0:
>       .ident  "PGC 7.0-7"
> [7:13] svbu-mpi:~/tmp % 
> -----
> 
> -- 
> Jeff Squyres
> jsquy...@cisco.com
> For corporate legal information go to:
> http://www.cisco.com/web/about/doing_business/legal/cri/
> 
> 
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


Reply via email to