Forwarding to the list. Duh.

-------- Original Message --------
Subject: Re: Fwd: Re: STDCXX-1071 numpunct facet defect
Date: Sun, 30 Sep 2012 19:02:27 -0400
From: Liviu Nicoara <nikko...@hates.ms>
To: Martin Sebor <mse...@gmail.com>

On 9/30/12 6:18 PM, Martin Sebor wrote:
I see you did a 64-bit build while I did a 32-bit one. so
I tried 64-bits. The cached version (i.e., the one compiled
with -UNO_USE_NUMPUNCT_CACHE) is still about twice as fast
as the non-cached one (compiled with -DNO_USE_NUMPUNCT_CACHE).

I had made one change to the test program that I thought might
account for the difference: I removed the call to abort from
the thread function since it was causing the process to exit
prematurely in some of my tests. But since you used the
modified program for your latest measurements that couldn't
be it.

I can't explain the differences. They just don't make sense
to me. Your results should be the other way around. Can you
post the disassembly of function f() for each of the two
configurations of the test?


Here they are.

Liviu




Dump of assembler code for function f:
   0x0000000000403870 <+0>:     push   %r15
   0x0000000000403872 <+2>:     push   %r14
   0x0000000000403874 <+4>:     push   %r13
   0x0000000000403876 <+6>:     push   %r12
   0x0000000000403878 <+8>:     push   %rbp
   0x0000000000403879 <+9>:     push   %rbx
   0x000000000040387a <+10>:    mov    %rdi,%rbx
   0x000000000040387d <+13>:    sub    $0x38,%rsp
   0x0000000000403881 <+17>:    nopl   0x0(%rax)
   0x0000000000403888 <+24>:    movzbl 0x261f11(%rip),%eax        # 0x6657a0 
<_ZL5pwait>
   0x000000000040388f <+31>:    test   %al,%al
   0x0000000000403891 <+33>:    jne    0x403888 <f+24>
   0x0000000000403893 <+35>:    cmpq   $0x0,0x261ef5(%rip)        # 0x665790 
<_ZL6nloops>
   0x000000000040389b <+43>:    jle    0x403b12 <f+674>
   0x00000000004038a1 <+49>:    xor    %ebp,%ebp
   0x00000000004038a3 <+51>:    xor    %r12d,%r12d
   0x00000000004038a6 <+54>:    lea    0x10(%rsp),%r13
   0x00000000004038ab <+59>:    lea    0x48(%rbx),%r14
   0x00000000004038af <+63>:    lea    0x20(%rsp),%r15
   0x00000000004038b4 <+68>:    jmpq   0x4039a7 <f+311>
   0x00000000004038b9 <+73>:    nopl   0x0(%rax)
   0x00000000004038c0 <+80>:    cmp    $0x66a020,%rdx
   0x00000000004038c7 <+87>:    mov    %rdi,0x20(%rsp)
   0x00000000004038cc <+92>:    je     0x403ab8 <f+584>
   0x00000000004038d2 <+98>:    mov    %rdx,%rdi
   0x00000000004038d5 <+101>:   mov    %rdx,(%rsp)
   0x00000000004038d9 <+105>:   callq  0x403658 <pthread_mutex_lock@plt>
   0x00000000004038de <+110>:   test   %eax,%eax
   0x00000000004038e0 <+112>:   mov    (%rsp),%rdx
   0x00000000004038e4 <+116>:   je     0x4038fb <f+139>
   0x00000000004038e6 <+118>:   mov    $0x4452a4,%esi
   0x00000000004038eb <+123>:   mov    $0xa,%edi
   0x00000000004038f0 <+128>:   xor    %eax,%eax
   0x00000000004038f2 <+130>:   callq  0x404370 <_ZN4__rw10__rw_throwEiz>
   0x00000000004038f7 <+135>:   mov    (%rsp),%rdx
   0x00000000004038fb <+139>:   addl   $0x1,0x28(%rdx)
   0x00000000004038ff <+143>:   test   %rdx,%rdx
   0x0000000000403902 <+146>:   je     0x40390c <f+156>
   0x0000000000403904 <+148>:   mov    %rdx,%rdi
   0x0000000000403907 <+151>:   callq  0x4036c8 <pthread_mutex_unlock@plt>
   0x000000000040390c <+156>:   mov    0x20(%rsp),%rdx
   0x0000000000403911 <+161>:   mov    %rdx,%rdi
   0x0000000000403914 <+164>:   mov    %rdx,(%rsp)
   0x0000000000403918 <+168>:   callq  0x403258 <strlen@plt>
   0x000000000040391d <+173>:   mov    (%rsp),%rdx
   0x0000000000403921 <+177>:   add    %rax,%r12
   0x0000000000403924 <+180>:   lea    -0x40(%rdx),%rcx
   0x0000000000403928 <+184>:   cmp    $0x66a020,%rcx
   0x000000000040392f <+191>:   je     0x40398b <f+283>
   0x0000000000403931 <+193>:   mov    %rcx,%rdi
   0x0000000000403934 <+196>:   mov    %rcx,0x8(%rsp)
   0x0000000000403939 <+201>:   callq  0x403658 <pthread_mutex_lock@plt>
   0x000000000040393e <+206>:   test   %eax,%eax
   0x0000000000403940 <+208>:   mov    (%rsp),%rdx
   0x0000000000403944 <+212>:   mov    0x8(%rsp),%rcx
   0x0000000000403949 <+217>:   je     0x403965 <f+245>
   0x000000000040394b <+219>:   mov    $0x4452a4,%esi
   0x0000000000403950 <+224>:   mov    $0xa,%edi
   0x0000000000403955 <+229>:   xor    %eax,%eax
   0x0000000000403957 <+231>:   callq  0x404370 <_ZN4__rw10__rw_throwEiz>
   0x000000000040395c <+236>:   mov    0x8(%rsp),%rcx
   0x0000000000403961 <+241>:   mov    (%rsp),%rdx
   0x0000000000403965 <+245>:   mov    -0x18(%rdx),%esi
   0x0000000000403968 <+248>:   test   %rcx,%rcx
   0x000000000040396b <+251>:   lea    -0x1(%rsi),%eax
   0x000000000040396e <+254>:   mov    %eax,-0x18(%rdx)
   0x0000000000403971 <+257>:   je     0x403983 <f+275>
   0x0000000000403973 <+259>:   mov    %rcx,%rdi
   0x0000000000403976 <+262>:   mov    %esi,0x8(%rsp)
   0x000000000040397a <+266>:   callq  0x4036c8 <pthread_mutex_unlock@plt>
   0x000000000040397f <+271>:   mov    0x8(%rsp),%esi
   0x0000000000403983 <+275>:   test   %esi,%esi
   0x0000000000403985 <+277>:   jle    0x403a80 <f+528>
   0x000000000040398b <+283>:   add    $0x1,%ebp
   0x000000000040398e <+286>:   movq   $0x0,0x20(%rsp)
   0x0000000000403997 <+295>:   movslq %ebp,%rax
   0x000000000040399a <+298>:   cmp    0x261def(%rip),%rax        # 0x665790 
<_ZL6nloops>
   0x00000000004039a1 <+305>:   jge    0x403b00 <f+656>
   0x00000000004039a7 <+311>:   mov    0x40(%rbx),%eax
   0x00000000004039aa <+314>:   test   $0x1,%al
   0x00000000004039ac <+316>:   jne    0x403a38 <f+456>
   0x00000000004039b2 <+322>:   or     $0x1,%eax
   0x00000000004039b5 <+325>:   mov    %rbx,%rsi
   0x00000000004039b8 <+328>:   mov    %r13,%rdi
   0x00000000004039bb <+331>:   mov    %eax,0x40(%rbx)
   0x00000000004039be <+334>:   mov    (%rbx),%rax
   0x00000000004039c1 <+337>:   callq  *(%rax)
   0x00000000004039c3 <+339>:   mov    %r13,%rsi
   0x00000000004039c6 <+342>:   mov    %r14,%rdi
   0x00000000004039c9 <+345>:   callq  0x410830 <_ZNSsaSERKSs>
   0x00000000004039ce <+350>:   mov    0x10(%rsp),%rdx
   0x00000000004039d3 <+355>:   sub    $0x40,%rdx
   0x00000000004039d7 <+359>:   cmp    $0x66a020,%rdx
   0x00000000004039de <+366>:   je     0x403a2f <f+447>
   0x00000000004039e0 <+368>:   mov    %rdx,%rdi
   0x00000000004039e3 <+371>:   mov    %rdx,(%rsp)
   0x00000000004039e7 <+375>:   callq  0x403658 <pthread_mutex_lock@plt>
   0x00000000004039ec <+380>:   test   %eax,%eax
   0x00000000004039ee <+382>:   mov    (%rsp),%rdx
   0x00000000004039f2 <+386>:   je     0x403a09 <f+409>
   0x00000000004039f4 <+388>:   mov    $0x4452a4,%esi
   0x00000000004039f9 <+393>:   mov    $0xa,%edi
   0x00000000004039fe <+398>:   xor    %eax,%eax
   0x0000000000403a00 <+400>:   callq  0x404370 <_ZN4__rw10__rw_throwEiz>
   0x0000000000403a05 <+405>:   mov    (%rsp),%rdx
   0x0000000000403a09 <+409>:   mov    0x28(%rdx),%ecx
   0x0000000000403a0c <+412>:   test   %rdx,%rdx
   0x0000000000403a0f <+415>:   lea    -0x1(%rcx),%eax
   0x0000000000403a12 <+418>:   mov    %eax,0x28(%rdx)
   0x0000000000403a15 <+421>:   je     0x403a27 <f+439>
   0x0000000000403a17 <+423>:   mov    %rdx,%rdi
   0x0000000000403a1a <+426>:   mov    %ecx,0x8(%rsp)
   0x0000000000403a1e <+430>:   callq  0x4036c8 <pthread_mutex_unlock@plt>
   0x0000000000403a23 <+435>:   mov    0x8(%rsp),%ecx
   0x0000000000403a27 <+439>:   test   %ecx,%ecx
   0x0000000000403a29 <+441>:   jle    0x403ac8 <f+600>
   0x0000000000403a2f <+447>:   movq   $0x0,0x10(%rsp)
   0x0000000000403a38 <+456>:   mov    0x48(%rbx),%rdi
   0x0000000000403a3c <+460>:   cmpl   $0xffffffffffffffff,-0x18(%rdi)
   0x0000000000403a40 <+464>:   lea    -0x40(%rdi),%rdx
   0x0000000000403a44 <+468>:   jne    0x4038c0 <f+80>
   0x0000000000403a4a <+474>:   mov    -0x8(%rdi),%rcx
   0x0000000000403a4e <+478>:   mov    %r15,%rdi
   0x0000000000403a51 <+481>:   mov    %rcx,%rdx
   0x0000000000403a54 <+484>:   mov    %rcx,%rsi
   0x0000000000403a57 <+487>:   mov    %rcx,0x8(%rsp)
   0x0000000000403a5c <+492>:   callq  0x41ae60 <_ZNSs10_C_get_repEmm>
   0x0000000000403a61 <+497>:   mov    0x8(%rsp),%rcx
   0x0000000000403a66 <+502>:   mov    0x48(%rbx),%rsi
   0x0000000000403a6a <+506>:   lea    0x40(%rax),%rdi
   0x0000000000403a6e <+510>:   mov    %rdi,0x20(%rsp)
   0x0000000000403a73 <+515>:   mov    %rcx,%rdx
   0x0000000000403a76 <+518>:   callq  0x403458 <memcpy@plt>
   0x0000000000403a7b <+523>:   jmpq   0x40390c <f+156>
   0x0000000000403a80 <+528>:   mov    0x20(%rsp),%rax
   0x0000000000403a85 <+533>:   mov    -0x10(%rax),%rsi
   0x0000000000403a89 <+537>:   lea    -0x40(%rax),%rdi
   0x0000000000403a8d <+541>:   add    $0x42,%rsi
   0x0000000000403a91 <+545>:   mov    %rsi,0x8(%rsp)
   0x0000000000403a96 <+550>:   callq  0x403698 <pthread_mutex_destroy@plt>
   0x0000000000403a9b <+555>:   mov    0x20(%rsp),%rdi
   0x0000000000403aa0 <+560>:   mov    0x8(%rsp),%rsi
   0x0000000000403aa5 <+565>:   xor    %edx,%edx
   0x0000000000403aa7 <+567>:   sub    $0x40,%rdi
   0x0000000000403aab <+571>:   callq  0x408170 <_ZN4__rw15__rw_deallocateEPvmi>
   0x0000000000403ab0 <+576>:   jmpq   0x40398b <f+283>
   0x0000000000403ab5 <+581>:   nopl   (%rax)
   0x0000000000403ab8 <+584>:   callq  0x403258 <strlen@plt>
   0x0000000000403abd <+589>:   add    %rax,%r12
   0x0000000000403ac0 <+592>:   jmpq   0x40398b <f+283>
   0x0000000000403ac5 <+597>:   nopl   (%rax)
   0x0000000000403ac8 <+600>:   mov    0x10(%rsp),%rax
   0x0000000000403acd <+605>:   mov    -0x10(%rax),%rsi
   0x0000000000403ad1 <+609>:   lea    -0x40(%rax),%rdi
   0x0000000000403ad5 <+613>:   add    $0x42,%rsi
   0x0000000000403ad9 <+617>:   mov    %rsi,0x8(%rsp)
   0x0000000000403ade <+622>:   callq  0x403698 <pthread_mutex_destroy@plt>
   0x0000000000403ae3 <+627>:   mov    0x10(%rsp),%rdi
   0x0000000000403ae8 <+632>:   mov    0x8(%rsp),%rsi
   0x0000000000403aed <+637>:   xor    %edx,%edx
   0x0000000000403aef <+639>:   sub    $0x40,%rdi
   0x0000000000403af3 <+643>:   callq  0x408170 <_ZN4__rw15__rw_deallocateEPvmi>
   0x0000000000403af8 <+648>:   jmpq   0x403a2f <f+447>
   0x0000000000403afd <+653>:   nopl   (%rax)
   0x0000000000403b00 <+656>:   mov    %r12,%rax
   0x0000000000403b03 <+659>:   add    $0x38,%rsp
   0x0000000000403b07 <+663>:   pop    %rbx
   0x0000000000403b08 <+664>:   pop    %rbp
   0x0000000000403b09 <+665>:   pop    %r12
   0x0000000000403b0b <+667>:   pop    %r13
   0x0000000000403b0d <+669>:   pop    %r14
   0x0000000000403b0f <+671>:   pop    %r15
   0x0000000000403b11 <+673>:   retq   
   0x0000000000403b12 <+674>:   xor    %eax,%eax
   0x0000000000403b14 <+676>:   jmp    0x403b03 <f+659>
   0x0000000000403b16 <+678>:   mov    %rax,%rbx
   0x0000000000403b19 <+681>:   mov    %r13,%rdi
   0x0000000000403b1c <+684>:   callq  0x40ff40 <_ZNSsD2Ev>
   0x0000000000403b21 <+689>:   mov    %rbx,%rdi
   0x0000000000403b24 <+692>:   callq  0x4036b8 <_Unwind_Resume@plt>

Dump of assembler code for function f:
   0x0000000000403870 <+0>:     push   %r15
   0x0000000000403872 <+2>:     push   %r14
   0x0000000000403874 <+4>:     push   %r13
   0x0000000000403876 <+6>:     push   %r12
   0x0000000000403878 <+8>:     mov    %rdi,%r12
   0x000000000040387b <+11>:    push   %rbp
   0x000000000040387c <+12>:    push   %rbx
   0x000000000040387d <+13>:    sub    $0x28,%rsp
   0x0000000000403881 <+17>:    nopl   0x0(%rax)
   0x0000000000403888 <+24>:    movzbl 0x2579d1(%rip),%eax        # 0x65b260 
<_ZL5pwait>
   0x000000000040388f <+31>:    test   %al,%al
   0x0000000000403891 <+33>:    jne    0x403888 <f+24>
   0x0000000000403893 <+35>:    xor    %eax,%eax
   0x0000000000403895 <+37>:    cmpq   $0x0,0x2579b3(%rip)        # 0x65b250 
<_ZL6nloops>
   0x000000000040389d <+45>:    jle    0x403973 <f+259>
   0x00000000004038a3 <+51>:    xor    %ebx,%ebx
   0x00000000004038a5 <+53>:    xor    %ebp,%ebp
   0x00000000004038a7 <+55>:    lea    0x10(%rsp),%r13
   0x00000000004038ac <+60>:    jmp    0x4038cc <f+92>
   0x00000000004038ae <+62>:    xchg   %ax,%ax
   0x00000000004038b0 <+64>:    add    $0x1,%ebx
   0x00000000004038b3 <+67>:    movq   $0x0,0x10(%rsp)
   0x00000000004038bc <+76>:    movslq %ebx,%rax
   0x00000000004038bf <+79>:    cmp    0x25798a(%rip),%rax        # 0x65b250 
<_ZL6nloops>
   0x00000000004038c6 <+86>:    jge    0x403970 <f+256>
   0x00000000004038cc <+92>:    mov    (%r12),%rax
   0x00000000004038d0 <+96>:    mov    %r12,%rsi
   0x00000000004038d3 <+99>:    mov    %r13,%rdi
   0x00000000004038d6 <+102>:   callq  *(%rax)
   0x00000000004038d8 <+104>:   mov    0x10(%rsp),%r14
   0x00000000004038dd <+109>:   lea    -0x40(%r14),%r15
   0x00000000004038e1 <+113>:   mov    %r14,%rdi
   0x00000000004038e4 <+116>:   callq  0x403258 <strlen@plt>
   0x00000000004038e9 <+121>:   add    %rax,%rbp
   0x00000000004038ec <+124>:   cmp    $0x65fae0,%r15
   0x00000000004038f3 <+131>:   je     0x4038b0 <f+64>
   0x00000000004038f5 <+133>:   mov    %r15,%rdi
   0x00000000004038f8 <+136>:   callq  0x403658 <pthread_mutex_lock@plt>
   0x00000000004038fd <+141>:   test   %eax,%eax
   0x00000000004038ff <+143>:   je     0x403912 <f+162>
   0x0000000000403901 <+145>:   mov    $0x43fdc4,%esi
   0x0000000000403906 <+150>:   mov    $0xa,%edi
   0x000000000040390b <+155>:   xor    %eax,%eax
   0x000000000040390d <+157>:   callq  0x404090 <_ZN4__rw10__rw_throwEiz>
   0x0000000000403912 <+162>:   mov    -0x18(%r14),%edx
   0x0000000000403916 <+166>:   test   %r15,%r15
   0x0000000000403919 <+169>:   lea    -0x1(%rdx),%eax
   0x000000000040391c <+172>:   mov    %eax,-0x18(%r14)
   0x0000000000403920 <+176>:   je     0x403932 <f+194>
   0x0000000000403922 <+178>:   mov    %r15,%rdi
   0x0000000000403925 <+181>:   mov    %edx,0x8(%rsp)
   0x0000000000403929 <+185>:   callq  0x4036c8 <pthread_mutex_unlock@plt>
   0x000000000040392e <+190>:   mov    0x8(%rsp),%edx
   0x0000000000403932 <+194>:   test   %edx,%edx
   0x0000000000403934 <+196>:   jg     0x4038b0 <f+64>
   0x000000000040393a <+202>:   mov    0x10(%rsp),%rax
   0x000000000040393f <+207>:   lea    -0x40(%rax),%rdi
   0x0000000000403943 <+211>:   mov    -0x10(%rax),%r14
   0x0000000000403947 <+215>:   callq  0x403698 <pthread_mutex_destroy@plt>
   0x000000000040394c <+220>:   mov    0x10(%rsp),%rdi
   0x0000000000403951 <+225>:   add    $0x42,%r14
   0x0000000000403955 <+229>:   xor    %edx,%edx
   0x0000000000403957 <+231>:   mov    %r14,%rsi
   0x000000000040395a <+234>:   sub    $0x40,%rdi
   0x000000000040395e <+238>:   callq  0x407e90 <_ZN4__rw15__rw_deallocateEPvmi>
   0x0000000000403963 <+243>:   jmpq   0x4038b0 <f+64>
   0x0000000000403968 <+248>:   nopl   0x0(%rax,%rax,1)
   0x0000000000403970 <+256>:   mov    %rbp,%rax
   0x0000000000403973 <+259>:   add    $0x28,%rsp
   0x0000000000403977 <+263>:   pop    %rbx
   0x0000000000403978 <+264>:   pop    %rbp
   0x0000000000403979 <+265>:   pop    %r12
   0x000000000040397b <+267>:   pop    %r13
   0x000000000040397d <+269>:   pop    %r14
   0x000000000040397f <+271>:   pop    %r15
   0x0000000000403981 <+273>:   retq   
End of assembler dump.

Reply via email to