On Wednesday, October 24, 2012 10:58:05 AM UTC+2, Jean-Pierre Flori wrote:
>
> [jp@jp-x220]% uname -a
> Linux jp-x220 3.5-trunk-amd64 #1 SMP Debian 3.5.5-1~experimental.1 x86_64
> GNU/Linux
>
> [jp@jp-x220]% cat /proc/cpuinfo
> processor : 0
> vendor_id : GenuineIntel
> cpu family : 6
> model : 42
> model name : Intel(R) Core(TM) i7-2620M CPU @ 2.70GHz
> ...
>
> [jp@jp-x220]% gcc -v
> Using built-in specs.
> COLLECT_GCC=gcc
> COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.7/lto-wrapper
> Target: x86_64-linux-gnu
> Configured with: ../src/configure -v --with-pkgversion='Debian 4.7.2-4'
> --with-bugurl=file:///usr/share/doc/gcc-4.7/README.Bugs
> --enable-languages=c,c++,go,fortran,objc,obj-c++ --prefix=/usr
> --program-suffix=-4.7 --enable-shared --enable-linker-build-id
> --with-system-zlib --libexecdir=/usr/lib --without-included-gettext
> --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.7
> --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
> --enable-libstdcxx-debug --enable-libstdcxx-time=yes
> --enable-gnu-unique-object --enable-plugin --enable-objc-gc
> --with-arch-32=i586 --with-tune=generic --enable-checking=release
> --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
> Thread model: posix
> gcc version 4.7.2 (Debian 4.7.2-4)
>
> [jp@jp-x220]% ./configure --prefix=$LOCAL --enable-gmpcompat --enable-cxx
>
> [jp@jp-x220]% ./config.guess
> sandybridge-unknown-linux-gnu
>
> [jp@jp-x220]% make tune
> ...
> Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
> Using: CPU cycle counter, supplemented by microsecond getrusage()
> speed_precision 1000000, speed_unittime 1.25e-09 secs, CPU freq 800.00 MHz
> DEFAULT_MAX_SIZE 1000, fft_max_size 50000
>
> /* Generated by tuneup.c, 2012-10-24, gcc 4.7 */
>
> #define MUL_KARATSUBA_THRESHOLD 16
> #define MUL_TOOM3_THRESHOLD 105
> #define MUL_TOOM4_THRESHOLD 244
> #define MUL_TOOM8H_THRESHOLD 327
>
> #define SQR_BASECASE_THRESHOLD 0 /* always (native) */
> #define SQR_KARATSUBA_THRESHOLD 31
> #define SQR_TOOM3_THRESHOLD 101
> #define SQR_TOOM4_THRESHOLD 256
> #define SQR_TOOM8_THRESHOLD 333
>
> #define POWM_THRESHOLD 138
>
> #define HGCD_THRESHOLD 75
> #define GCD_DC_THRESHOLD 2797
> #define GCDEXT_DC_THRESHOLD 1788
> #define JACOBI_BASE_METHOD 1
>
> #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
> #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
> #define MOD_1_NORM_THRESHOLD 0 /* always */
> #define MOD_1_UNNORM_THRESHOLD 0 /* always */
> #define USE_PREINV_DIVREM_1 1 /* native */
> #define USE_PREINV_MOD_1 1
> #define DIVEXACT_1_THRESHOLD 0 /* always */
> #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
> #define MOD_1_1_THRESHOLD 7
> #define MOD_1_2_THRESHOLD 7
> #define MOD_1_3_THRESHOLD 23
> #define DIVREM_HENSEL_QR_1_THRESHOLD 29
> #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5
> #define DIVREM_EUCLID_HENSEL_THRESHOLD 146
>
> #define ROOTREM_THRESHOLD 6
>
> #define GET_STR_DC_THRESHOLD 17
> #define GET_STR_PRECOMPUTE_THRESHOLD 23
> #define SET_STR_DC_THRESHOLD 6915
> #define SET_STR_PRECOMPUTE_THRESHOLD 7939
>
> #define MUL_FFT_FULL_THRESHOLD 3008
>
> #define SQR_FFT_FULL_THRESHOLD 3520
>
> #define MULLOW_BASECASE_THRESHOLD 7
> #define MULLOW_DC_THRESHOLD 30
> #define MULLOW_MUL_THRESHOLD 4525
>
> #define MULHIGH_BASECASE_THRESHOLD 10
> #define MULHIGH_DC_THRESHOLD 27
> #define MULHIGH_MUL_THRESHOLD 2966
>
> #define MULMOD_2EXPM1_THRESHOLD 20
>
> #define FAC_UI_THRESHOLD 1590
> #define DC_DIV_QR_THRESHOLD 100
> #define DC_DIVAPPR_Q_N_THRESHOLD 90
> #define INV_DIV_QR_THRESHOLD 465
> #define INV_DIVAPPR_Q_N_THRESHOLD 90
> #define DC_DIV_Q_THRESHOLD 136
> #define INV_DIV_Q_THRESHOLD 5581
> #define DC_DIVAPPR_Q_THRESHOLD 100
> #define INV_DIVAPPR_Q_THRESHOLD 12502
> #define DC_BDIV_QR_THRESHOLD 100
> #define DC_BDIV_Q_THRESHOLD 44
>
> /* fft_tuning -- autogenerated by tune-fft */
>
> #define FFT_TAB \
> { { 4, 3 }, { 3, 2 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }
>
> #define MULMOD_TAB \
> { 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 }
>
> #define FFT_N_NUM 19
>
> #define FFT_MULMOD_2EXPP1_CUTOFF 128
>
>
> /* Tuneup completed successfully, took 125 seconds */
>
>
> There might have been some problems with CPU throttling above (look at the
800MHz in the make tune output.
Here is what I get when setting the cpufreq governor to performance (i.e.
2.7GHz).
[jp@jp-x220]% make tune
...
./tuneup
Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
Using: CPU cycle counter, supplemented by microsecond getrusage()
speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz
DEFAULT_MAX_SIZE 1000, fft_max_size 50000
/* Generated by tuneup.c, 2012-10-24, gcc 4.7 */
#define MUL_KARATSUBA_THRESHOLD 16
#define MUL_TOOM3_THRESHOLD 105
#define MUL_TOOM4_THRESHOLD 246
#define MUL_TOOM8H_THRESHOLD 327
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_KARATSUBA_THRESHOLD 31
#define SQR_TOOM3_THRESHOLD 61
#define SQR_TOOM4_THRESHOLD 178
#define SQR_TOOM8_THRESHOLD 240
#define POWM_THRESHOLD 138
#define HGCD_THRESHOLD 42
#define GCD_DC_THRESHOLD 2770
#define GCDEXT_DC_THRESHOLD 1788
#define JACOBI_BASE_METHOD 1
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define USE_PREINV_DIVREM_1 1 /* native */
#define USE_PREINV_MOD_1 1
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
#define MOD_1_1_THRESHOLD 7
#define MOD_1_2_THRESHOLD 7
#define MOD_1_3_THRESHOLD 23
#define DIVREM_HENSEL_QR_1_THRESHOLD 31
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5
#define DIVREM_EUCLID_HENSEL_THRESHOLD 15
#define ROOTREM_THRESHOLD 6
#define GET_STR_DC_THRESHOLD 16
#define GET_STR_PRECOMPUTE_THRESHOLD 23
#define SET_STR_DC_THRESHOLD 6915
#define SET_STR_PRECOMPUTE_THRESHOLD 6915
#define MUL_FFT_FULL_THRESHOLD 3008
#define SQR_FFT_FULL_THRESHOLD 3520
#define MULLOW_BASECASE_THRESHOLD 7
#define MULLOW_DC_THRESHOLD 30
#define MULLOW_MUL_THRESHOLD 4570
#define MULHIGH_BASECASE_THRESHOLD 10
#define MULHIGH_DC_THRESHOLD 27
#define MULHIGH_MUL_THRESHOLD 2966
#define MULMOD_2EXPM1_THRESHOLD 20
#define FAC_UI_THRESHOLD 1605
#define DC_DIV_QR_THRESHOLD 100
#define DC_DIVAPPR_Q_N_THRESHOLD 91
#define INV_DIV_QR_THRESHOLD 465
#define INV_DIVAPPR_Q_N_THRESHOLD 91
#define DC_DIV_Q_THRESHOLD 130
#define INV_DIV_Q_THRESHOLD 5581
#define DC_DIVAPPR_Q_THRESHOLD 102
#define INV_DIVAPPR_Q_THRESHOLD 12637
#define DC_BDIV_QR_THRESHOLD 100
#define DC_BDIV_Q_THRESHOLD 42
/* fft_tuning -- autogenerated by tune-fft */
#define FFT_TAB \
{ { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }
#define MULMOD_TAB \
{ 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1 }
#define FFT_N_NUM 23
#define FFT_MULMOD_2EXPP1_CUTOFF 128
/* Tuneup completed successfully, took 137 seconds */
These are slightly different, but not more than if I rerun make tune once
more with the CPU stuck at max frequency, see below:
[jp@jp-x220]% make tune
...
./tuneup
Parameters for ./mpn/x86_64/sandybridge/gmp-mparam.h
Using: CPU cycle counter, supplemented by microsecond getrusage()
speed_precision 1000000, speed_unittime 3.70e-10 secs, CPU freq 2701.00 MHz
DEFAULT_MAX_SIZE 1000, fft_max_size 50000
/* Generated by tuneup.c, 2012-10-24, gcc 4.7 */
#define MUL_KARATSUBA_THRESHOLD 16
#define MUL_TOOM3_THRESHOLD 105
#define MUL_TOOM4_THRESHOLD 244
#define MUL_TOOM8H_THRESHOLD 303
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_KARATSUBA_THRESHOLD 31
#define SQR_TOOM3_THRESHOLD 95
#define SQR_TOOM4_THRESHOLD 250
#define SQR_TOOM8_THRESHOLD 351
#define POWM_THRESHOLD 138
#define HGCD_THRESHOLD 37
#define GCD_DC_THRESHOLD 2587
#define GCDEXT_DC_THRESHOLD 1788
#define JACOBI_BASE_METHOD 1
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define USE_PREINV_DIVREM_1 1 /* native */
#define USE_PREINV_MOD_1 1
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
#define MOD_1_1_THRESHOLD 7
#define MOD_1_2_THRESHOLD 7
#define MOD_1_3_THRESHOLD 23
#define DIVREM_HENSEL_QR_1_THRESHOLD 31
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 5
#define DIVREM_EUCLID_HENSEL_THRESHOLD 121
#define ROOTREM_THRESHOLD 6
#define GET_STR_DC_THRESHOLD 17
#define GET_STR_PRECOMPUTE_THRESHOLD 23
#define SET_STR_DC_THRESHOLD 6915
#define SET_STR_PRECOMPUTE_THRESHOLD 8097
#define MUL_FFT_FULL_THRESHOLD 3008
#define SQR_FFT_FULL_THRESHOLD 3520
#define MULLOW_BASECASE_THRESHOLD 7
#define MULLOW_DC_THRESHOLD 30
#define MULLOW_MUL_THRESHOLD 4525
#define MULHIGH_BASECASE_THRESHOLD 10
#define MULHIGH_DC_THRESHOLD 30
#define MULHIGH_MUL_THRESHOLD 2966
#define MULMOD_2EXPM1_THRESHOLD 20
#define FAC_UI_THRESHOLD 1590
#define DC_DIV_QR_THRESHOLD 100
#define DC_DIVAPPR_Q_N_THRESHOLD 90
#define INV_DIV_QR_THRESHOLD 465
#define INV_DIVAPPR_Q_N_THRESHOLD 90
#define DC_DIV_Q_THRESHOLD 39
#define INV_DIV_Q_THRESHOLD 5581
#define DC_DIVAPPR_Q_THRESHOLD 104
#define INV_DIVAPPR_Q_THRESHOLD 14091
#define DC_BDIV_QR_THRESHOLD 100
#define DC_BDIV_Q_THRESHOLD 44
/* fft_tuning -- autogenerated by tune-fft */
#define FFT_TAB \
{ { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } }
#define MULMOD_TAB \
{ 4, 3, 3, 4, 4, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1 }
#define FFT_N_NUM 19
#define FFT_MULMOD_2EXPP1_CUTOFF 128
/* Tuneup completed successfully, took 124 seconds */
--
You received this message because you are subscribed to the Google Groups
"mpir-devel" group.
To view this discussion on the web visit
https://groups.google.com/d/msg/mpir-devel/-/jDgob3Czm8UJ.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/mpir-devel?hl=en.