On 09/26/12 20:12, Liviu Nicoara wrote:
I have created STDCXX-1071 and linked to STDCXX-1056. [...]

I am open to all questions, the more the better. Most of my opinions have been 
expressed earlier, but please ask if you want to know more.


I am attaching here the proposed (4.3.x) patch and the timings results (after 
re-verifying the correctness of the timing program and the results). The 4.2.x 
patch, the 4.3.x patch, the test program and the results file are also attached 
to the incident.

Thanks,
Liviu



Index: include/loc/_numpunct.h
===================================================================
--- include/loc/_numpunct.h     (revision 1388733)
+++ include/loc/_numpunct.h     (working copy)
@@ -61,7 +61,7 @@ struct numpunct: _RW::__rw_facet
     string_type;
 
     _EXPLICIT numpunct (_RWSTD_SIZE_T __ref = 0)
-        : _RW::__rw_facet (__ref), _C_flags (0) { }
+        : _RW::__rw_facet (__ref) { }
 
     virtual ~numpunct () _RWSTD_ATTRIBUTE_NOTHROW;
 
@@ -109,15 +109,6 @@ protected:
     virtual string_type do_falsename () const {
         return _RW::__rw_get_punct (this, _RW::__rw_fn, char_type ());
     }
-
-private:
-
-    int         _C_flags;           // bitmap of "cached data valid" flags
-    string      _C_grouping;        // cached results of virtual members
-    string_type _C_truename;
-    string_type _C_falsename;
-    char_type   _C_decimal_point;
-    char_type   _C_thousands_sep;
 };
 
 
@@ -139,17 +130,7 @@ template <class _CharT>
 inline _TYPENAME numpunct<_CharT>::char_type
 numpunct<_CharT>::decimal_point () const
 {
-    if (!(_C_flags & _RW::__rw_dp)) {
-
-        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
-        // [try to] get the decimal point first (may throw)
-        // then set a flag to avoid future initializations
-        __self->_C_decimal_point  = do_decimal_point ();
-        __self->_C_flags         |= _RW::__rw_dp;
-    }
-
-    return _C_decimal_point;
+    return do_decimal_point ();
 }
 
 
@@ -157,34 +138,14 @@ template <class _CharT>
 inline _TYPENAME numpunct<_CharT>::char_type
 numpunct<_CharT>::thousands_sep () const
 {
-    if (!(_C_flags & _RW::__rw_ts)) {
-
-        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
-        // [try to] get the thousands_sep first (may throw)
-        // then set a flag to avoid future initializations
-        __self->_C_thousands_sep  = do_thousands_sep ();
-        __self->_C_flags         |= _RW::__rw_ts;
-    }
-
-    return _C_thousands_sep;
+    return do_thousands_sep ();
 }
 
 
 template <class _CharT>
 inline string numpunct<_CharT>::grouping () const
 {
-    if (!(_C_flags & _RW::__rw_gr)) {
-
-        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
-        // [try to] get the grouping first (may throw)
-        // then set a flag to avoid future initializations
-        __self->_C_grouping  = do_grouping ();
-        __self->_C_flags    |= _RW::__rw_gr;
-    }
-
-    return _C_grouping;
+    return do_grouping ();
 }
 
 
@@ -192,17 +153,7 @@ template <class _CharT>
 inline _TYPENAME numpunct<_CharT>::string_type
 numpunct<_CharT>::truename () const
 {
-    if (!(_C_flags & _RW::__rw_tn)) {
-
-        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
-        // [try to] get the true name first (may throw)
-        // then set a flag to avoid future initializations
-        __self->_C_truename  = do_truename ();
-        __self->_C_flags    |= _RW::__rw_tn;
-    }
-
-    return _C_truename;
+    return do_truename ();
 }
 
 
@@ -210,17 +161,7 @@ template <class _CharT>
 inline _TYPENAME numpunct<_CharT>::string_type
 numpunct<_CharT>::falsename () const
 {
-    if (!(_C_flags & _RW::__rw_fn)) {
-
-        numpunct* const __self = _RWSTD_CONST_CAST (numpunct*, this);
-
-        // [try to] get the false name first (may throw)
-        // then set a flag to avoid future initializations
-        __self->_C_falsename  = do_falsename ();
-        __self->_C_flags     |= _RW::__rw_fn;
-    }
-
-    return _C_falsename;
+    return do_falsename ();
 }
 
 // #endif _RWSTD_NO_EXT_NUMPUNCT_PRIMARY
-*- mode: org -*-

* Machines:

** iMac, Intel, 4 cores:

$ uname -a; gcc -v
Darwin imax 11.4.0 Darwin Kernel Version 11.4.0: Mon Apr  9 19:32:15 PDT 2012; 
root:xnu-1699.26.8~1/RELEASE_X86_64 x86_64
gcc version 4.7.1 (GCC) 

** Linux Slackware, AMD, 16 cores:

$ uname -a; gcc -v
Linux behemoth 2.6.37.6 #3 SMP Sat Apr 9 22:49:32 CDT 2011 x86_64 AMD 
Opteron(tm) Processor 6134 AuthenticAMD GNU/Linux
gcc version 4.5.2 (GCC) 

* Method

** Library

Apply the patch. Build an optimized library (I used 12S in all runs). Build the 
library, rwtest, and locale database:

$ nice make -Clib
$ nice make -Cbin locales
$ nice make -Crwtest 

Properly export the necessary envar if running against STDCXX locale
database or unset, otherwise:

$ export RWSTD_LOCALE_ROOT=/path/to/.../nls

** Test program

Place the multi-threaded program source file, t.cpp, in
<srcdir>/tests/localization and run make in the builddir: 

$ cd <builddir>/tests; nice make t

** Run the test

The simplest run for the program, with one locale name argument runs
16 threads and 10 mil. iterations. This is adjustable with command
line arguments:

$ ./t en_US.UTF-8 4 2000

runs with 4 threads and 2000 iterations.

* Results

** iMac results

*** Current implementation, system locale database


$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000

real    0m33.104s
user    2m10.449s
sys     0m0.112s
8, 50000000

real    0m16.542s
user    1m4.338s
sys     0m0.068s
4, 50000000

real    0m8.252s
user    0m31.018s
sys     0m0.040s
2, 50000000

real    0m3.818s
user    0m7.619s
sys     0m0.005s
1, 50000000

real    0m1.057s
user    0m1.055s
sys     0m0.001s


*** Non-caching implementation, system locale database


$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000

real    0m24.898s
user    1m37.334s
sys     0m0.630s
8, 50000000

real    0m11.637s
user    0m45.625s
sys     0m0.202s
4, 50000000

real    0m5.273s
user    0m20.678s
sys     0m0.021s
2, 50000000

real    0m4.797s
user    0m9.573s
sys     0m0.002s
1, 50000000

real    0m4.772s
user    0m4.770s
sys     0m0.002s


*** Curent implementation, STDCXX locales database


$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done16, 50000000

real    0m33.701s
user    2m12.132s
sys     0m0.132s
8, 50000000

real    0m16.854s
user    1m6.015s
sys     0m0.070s
4, 50000000

real    0m8.424s
user    0m33.142s
sys     0m0.029s
2, 50000000

real    0m4.217s
user    0m8.411s
sys     0m0.004s
1, 50000000

real    0m1.061s
user    0m1.059s
sys     0m0.001s


*** Non-caching implementation, STDCXX locale database


$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done16, 50000000

real    0m22.630s
user    1m28.019s
sys     0m0.674s
8, 50000000

real    0m11.032s
user    0m43.181s
sys     0m0.179s
4, 50000000

real    0m5.812s
user    0m22.502s
sys     0m0.022s
2, 50000000

real    0m4.801s
user    0m9.593s
sys     0m0.003s
1, 50000000

real    0m4.758s
user    0m4.755s
sys     0m0.002s


** Linux results

*** Current implementation, system locale database

$ for t in 16 8 4 2 1; do time ./t en_US.utf8 $t 50000000; done
16, 50000000

real    2m30.836s
user    2m52.162s
sys     34m17.302s
8, 50000000

real    1m47.536s
user    2m8.017s
sys     11m3.016s
4, 50000000

real    4m29.681s
user    6m10.412s
sys     10m0.532s
2, 50000000

real    0m17.843s
user    0m23.615s
sys     0m9.642s
1, 50000000

real    0m3.342s
user    0m3.338s
sys     0m0.003s


*** Non-caching implementation, system locale database


$ for t in 16 8 4 2 1; do time ./t en_US.utf8 $t 50000000; done
16, 50000000

real    0m13.573s
user    3m12.970s
sys     0m12.893s
8, 50000000

real    0m7.924s
user    1m2.516s
sys     0m0.047s
4, 50000000

real    0m8.051s
user    0m31.175s
sys     0m0.004s
2, 50000000

real    0m8.070s
user    0m15.789s
sys     0m0.002s
1, 50000000

real    0m7.610s
user    0m7.605s
sys     0m0.003s


*** Current implementation, STDCXX locale database

$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000

real    2m35.381s
user    2m54.088s
sys     34m25.967s
8, 50000000

real    1m51.603s
user    2m14.352s
sys     11m16.013s
4, 50000000

real    4m38.439s
user    6m26.472s
sys     10m13.408s
2, 50000000

real    0m13.011s
user    0m14.130s
sys     0m9.338s
1, 50000000

real    0m3.342s
user    0m3.338s
sys     0m0.002s


*** Non-caching implementation, STDCXX locale database

$ for t in 16 8 4 2 1; do time ./t en_US.UTF-8 $t 50000000; done
16, 50000000

real    0m13.674s
user    3m15.103s
sys     0m14.144s
8, 50000000

real    0m8.038s
user    1m2.721s
sys     0m0.005s
4, 50000000

real    0m7.961s
user    0m31.211s
sys     0m0.003s
2, 50000000

real    0m7.899s
user    0m15.627s
sys     0m0.003s
1, 50000000

real    0m7.836s
user    0m7.830s
sys     0m0.004s

Reply via email to