On Thu, 25 Mar 2010, stephane eranian wrote:

> vince,
> 
> Could you make your program such it has only 1 billion instructions?
> I want to see if there is some correlation with counter overflows.
> They are actually only 31 bit on Intel.

sure, I've attached the 1 billion instruction version.

I'm also working on a few memory microbenchmarks too to see if I can maybe 
find any patterns in the retired_stores cases.

Vince
# Test case with 1 billion instructions
#  by Vince Weaver, vweaver1 _at_ eecs.utk.edu
        
# To build on x86_64
#  as -o one_billion.o one_billion.s ; ld -o one_billion one_billion.o
# To build on i686
#  as --defsym i386=1 one_billion.o one_billion.s ; ld -o one_billion 
one_billion.o

# With deterministic performance counters, this should get you
#     1,000,000,000 retired instructions
#       499,998,996 retired branches
#                 0 retired stores
#                 0 retired loads
        

             #   retired_instructions is 
             #    2 + (( (inside) + 2) * 1000) + 2 + (996 * 2) + 4
             #    inside = ( 2 + (499997 * 2)) = 999996
             #    total = 2 + ((999996+2) * 1000) + 2 + 1992 + 4
             #    total = 10 billion
             
             # retired branches is
             #    ((499,997 + 1) * 1,000) + 996 = 499,998,996
             
        .globl _start   
_start:

        xor     %edx,%edx               # outer counter
        mov     $1000,%edx              # 
outside_loop:

        xor     %ecx,%ecx               # not needed, pads total to 1M
        mov     $499997,%ecx            # load counter
inside_loop:    
        dec     %ecx                    # repeat count times
        jnz     inside_loop

        dec     %edx
        jnz     outside_loop
        
        # above gets us to within 1,994
        
        xor     %ecx,%ecx
        mov     $996,%ecx
final_loop:
        dec     %ecx
        jnz     final_loop
        
        

        #================================
        # Exit
        #================================

exit:
        nop                             # make it an even number of insn

.ifdef i386
        xor     %ebx,%ebx               # we return 0
        mov     $1,%eax
        int     $0x80
.else   
        xor     %rdi,%rdi               # we return 0
        mov     $60,%rax
        syscall
.endif

        
------------------------------------------------------------------------------
Download Intel® Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to