In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/6b0260474df579e9412f57249519747ab8bb5c2b?hp=6bc3ceb8e17fdabd5d840e82376879dbae92483b>
- Log ----------------------------------------------------------------- commit 6b0260474df579e9412f57249519747ab8bb5c2b Author: Yves Orton <[email protected]> Date: Tue Dec 6 23:48:52 2016 +0100 use a hybrid hash function, OAATH for short keys, Siphash 1-3 for longer ones Switch to an optimized/unrolled variant of OAATH for keys 16 bytes and less, and use Siphash 1-3 for longer keys. (On 64 bit builds, 32 bit is untouched.) I've done a bunch of benchmarking with Porting/bench.pl to see at what point the 8 byte reads of Siphash 1-3 start to win over the low overhead costs of OAATH. It seems to be around 16 bytes. At the same time, we unroll OAATH so that we can save some instructions per character. The net result is all key lengths get faster as measured by Porting/bench.pl, and hashing longer keys becomes *much* faster. Interestingly the actual crossover point of OAATH being slower than Siphash 1-3 is much higher than might be guessed from bulk testing either in a raw benchmark. For instance, basic benchmarks of the hash function alone predicts that Siphash 1-3 should "win" at around 4 bytes. However, in practice it is four times longer. I believe this is because setting up Siphash blows away a fair amount of CPU state compared to OAATH, which is more important when the hash is going to be used to manage a data structure. So it requires a correspondingly longer string before the larger sized read starts to win. Summarized stats (higher is better): AVERAGE blead yves ------ ------ Ir 100.00 104.47 Dr 100.00 103.97 Dw 100.00 107.63 COND 100.00 112.16 IND 100.00 75.07 COND_m 100.00 125.75 IND_m 100.00 97.42 Ir_m1 100.00 137.11 Dr_m1 100.00 100.10 Dw_m1 100.00 99.62 Ir_mm 100.00 100.00 Dr_mm 100.00 100.00 Dw_mm 100.00 100.00 M hv_func.h commit 86f791091e14932363af3abc0e78844b973b86b5 Author: Yves Orton <[email protected]> Date: Tue Dec 6 23:27:38 2016 +0100 Add some tests for hash keys of different sizes. We test every key length from 1..24, and then various larger sizes. We expect most of our keys to be short so we check each length to see how performance is affected as key length increases. M t/perf/benchmarks commit 4044748b2797a9228c7309bce401eb5241f8f8a7 Author: Yves Orton <[email protected]> Date: Tue Dec 6 23:16:44 2016 +0100 allow two-step benchmarks with Porting/bench.pl with this patch you can time blead, and save the results: bench.pl --tests=/whatever/ --write=blead.time -- ./perl=blead then iterate over hacking the code and running bench.pl --read=blead.time --write=last.time -- ./perl=hacked and seeing the results of the --tests defined in the first run, and the results from the save file 'blead.time' compared against the modified version of perl in the most recent run. At the same time the merged data is written as last.time, allowing the usual post-test analysis of the results. M Porting/bench.pl ----------------------------------------------------------------------- Summary of changes: Porting/bench.pl | 85 ++++++++++--------- hv_func.h | 240 +++++++++++++++++++++++++++++++++++++++++------------- t/perf/benchmarks | 15 +++- 3 files changed, 240 insertions(+), 100 deletions(-) diff --git a/Porting/bench.pl b/Porting/bench.pl index be46c0e07c..03fd5cc621 100755 --- a/Porting/bench.pl +++ b/Porting/bench.pl @@ -25,6 +25,15 @@ perls. bench.pl --action=selftest + # Run bench on blead, which is then modified and timed again + + bench.pl [options] --write=blead.time -- ./perl=blead + # hack hack hack + bench.pl [options] --read=blead.time -- ./perl=hacked + + # You can also combine --read with --write + bench.pl [options] --read=blead.time --write=last.time -- ./perl=hacked + =head1 DESCRIPTION By default, F<bench.pl> will run code snippets found in @@ -42,10 +51,14 @@ measurements, such as instruction reads, conditional branch misses etc. There are options to write the raw data to a file, and to read it back. This means that you can view the same run data in different views with -different selection and sort options. +different selection and sort options. You can also use this mechanism +to save the results of timing one perl, and then read it back while timing +a modification, so that you dont have rerun the same tests on the same +perl over and over, or have two perls built at the same time. The optional C<=label> after each perl executable is used in the display -output. +output. If you are doing a two step benchmark then you should provide +a label for at least the "base" perl. =head1 OPTIONS @@ -264,7 +277,9 @@ Display progress information. --write=I<file> Save the raw data to the specified file. It can be read back later with -C<--read>. +C<--read>. If combined with C<--read> then the output file will be +the merge of the file read and any additional perls added on the command +line. Requires C<JSON::PP> to be available. @@ -385,10 +400,6 @@ my %OPTS = ( usage if $OPTS{help}; - if (defined $OPTS{read} and defined $OPTS{write}) { - die "Error: can't specify both --read and --write options\n"; - } - if (defined $OPTS{read} or defined $OPTS{write}) { # fail early if it's not present require JSON::PP; @@ -438,21 +449,6 @@ my %OPTS = ( die "Error: Can't specify both --bisect and --write\n" if defined $OPTS{write}; } - elsif (defined $OPTS{read}) { - if (@ARGV) { - die "Error: no perl executables may be specified with --read\n" - } - } - elsif ($OPTS{raw}) { - unless (@ARGV) { - die "Error: at least one perl executable must be specified\n"; - } - } - else { - unless (@ARGV >= 2) { - die "Error: at least two perl executables must be specified\n"; - } - } if ($OPTS{action} eq 'grind') { do_grind(\@ARGV); @@ -494,6 +490,7 @@ sub filter_tests { delete $tests->{$_} unless exists $t{$_}; } } + die "Error: no tests to run\n" unless %$tests; } @@ -550,8 +547,9 @@ sub select_a_perl { # 'perl-under-test's (PUTs) sub process_puts { + my $read_perls= shift; my @res_puts; # returned, each item is [ perlexe, label, @putargs ] - my %seen; + my %seen= map { $_->[1] => 1 } @$read_perls; my @putargs; # collect not-perls into args per PUT for my $p (reverse @_) { @@ -674,9 +672,9 @@ sub do_grind { if $bisect_min > $bisect_max; } - if (defined $OPTS{read}) { + if ($OPTS{read}) { open my $in, '<:encoding(UTF-8)', $OPTS{read} - or die " Error: can't open $OPTS{read} for reading: $!\n"; + or die " Error: can't open '$OPTS{read}' for reading: $!\n"; my $data = do { local $/; <$in> }; close $in; @@ -695,21 +693,23 @@ sub do_grind { $order = [ sort keys %$tests ]; } } - else { - # How many times to execute the loop for the two trials. The lower - # value is intended to do the loop enough times that branch - # prediction has taken hold; the higher loop allows us to see the - # branch misses after that - $loop_counts = [10, 20]; - - ($tests, $order) = read_tests_file($OPTS{benchfile}); - die "Error: only a single test may be specified with --bisect\n" - if defined $OPTS{bisect} and keys %$tests != 1; - - $perls = [ process_puts(@$perl_args) ]; + if (@$perl_args) { + unless ($loop_counts) { + # How many times to execute the loop for the two trials. The lower + # value is intended to do the loop enough times that branch + # prediction has taken hold; the higher loop allows us to see the + # branch misses after that + $loop_counts = [10, 20]; + + ($tests, $order) = read_tests_file($OPTS{benchfile}); + die "Error: only a single test may be specified with --bisect\n" + if defined $OPTS{bisect} and keys %$tests != 1; + } - $results = grind_run($tests, $order, $perls, $loop_counts); + my @run_perls= process_puts($perls, @$perl_args); + push @$perls, @run_perls; + $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results); } # now that we have a list of perls, use it to process the @@ -740,7 +740,7 @@ sub do_grind { print $out $json or die "Error: writing to file '$OPTS{write}': $!\n"; close $out or die "Error: closing file '$OPTS{write}': $!\n"; } - else { + if (@$perls>1) { my ($processed, $averages) = grind_process($results, $perls, $loop_counts); @@ -773,7 +773,7 @@ sub do_grind { # Return a hash ref suitable for input to grind_process() sub grind_run { - my ($tests, $order, $perls, $counts) = @_; + my ($tests, $order, $perls, $counts, $results) = @_; # Build a list of all the jobs to run @@ -837,7 +837,6 @@ sub grind_run { my $running = 0; # count of executing jobs my %pids; # map pids to jobs my %fds; # map fds to jobs - my %results; my $select = IO::Select->new(); while (@jobs or $running) { @@ -938,7 +937,7 @@ sub grind_run { . "Output\n$o"; } - $results{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}] + $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}] = parse_cachegrind($output, $j->{id}, $j->{perl}); } @@ -961,7 +960,7 @@ sub grind_run { } } - return \%results; + return $results; } diff --git a/hv_func.h b/hv_func.h index 57b1ed1375..8e0329f3c8 100644 --- a/hv_func.h +++ b/hv_func.h @@ -14,6 +14,8 @@ #if !( 0 \ || defined(PERL_HASH_FUNC_SIPHASH) \ + || defined(PERL_HASH_FUNC_SIPHASH13) \ + || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \ || defined(PERL_HASH_FUNC_SDBM) \ || defined(PERL_HASH_FUNC_DJB2) \ || defined(PERL_HASH_FUNC_SUPERFAST) \ @@ -24,13 +26,25 @@ || defined(PERL_HASH_FUNC_MURMUR_HASH_64A) \ || defined(PERL_HASH_FUNC_MURMUR_HASH_64B) \ ) +#ifdef HAS_QUAD +#define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13 +#else #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD #endif +#endif #if defined(PERL_HASH_FUNC_SIPHASH) # define PERL_HASH_FUNC "SIPHASH_2_4" # define PERL_HASH_SEED_BYTES 16 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_SIPHASH13) +# define PERL_HASH_FUNC "SIPHASH_1_3" +# define PERL_HASH_SEED_BYTES 16 +# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) +# define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3" +# define PERL_HASH_SEED_BYTES 24 +# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len)) #elif defined(PERL_HASH_FUNC_SUPERFAST) # define PERL_HASH_FUNC "SUPERFAST" # define PERL_HASH_SEED_BYTES 4 @@ -192,72 +206,89 @@ ((U64)((p)[7]) << 56)) #define SIPROUND \ - do { \ + STMT_START { \ v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ - } while(0) + } STMT_END /* SipHash-2-4 */ -PERL_STATIC_INLINE U32 -S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { - /* "somepseudorandomlygeneratedbytes" */ - U64 v0 = UINT64_C(0x736f6d6570736575); - U64 v1 = UINT64_C(0x646f72616e646f6d); - U64 v2 = UINT64_C(0x6c7967656e657261); - U64 v3 = UINT64_C(0x7465646279746573); - - U64 b; - U64 k0 = ((const U64*)seed)[0]; - U64 k1 = ((const U64*)seed)[1]; - U64 m; - const int left = inlen & 7; - const U8 *end = in + inlen - left; - - b = ( ( U64 )(inlen) ) << 56; - v3 ^= k1; - v2 ^= k0; - v1 ^= k1; - v0 ^= k0; - - for ( ; in != end; in += 8 ) - { - m = U8TO64_LE( in ); - v3 ^= m; - SIPROUND; - SIPROUND; - v0 ^= m; - } - - switch( left ) - { - case 7: b |= ( ( U64 )in[ 6] ) << 48; - case 6: b |= ( ( U64 )in[ 5] ) << 40; - case 5: b |= ( ( U64 )in[ 4] ) << 32; - case 4: b |= ( ( U64 )in[ 3] ) << 24; - case 3: b |= ( ( U64 )in[ 2] ) << 16; - case 2: b |= ( ( U64 )in[ 1] ) << 8; - case 1: b |= ( ( U64 )in[ 0] ); break; - case 0: break; - } - - v3 ^= b; - SIPROUND; - SIPROUND; - v0 ^= b; - - v2 ^= 0xff; - SIPROUND; - SIPROUND; - SIPROUND; - SIPROUND; - b = v0 ^ v1 ^ v2 ^ v3; - return (U32)(b & U32_MAX); + +#define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \ +PERL_STATIC_INLINE U32 \ +FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \ + /* "somepseudorandomlygeneratedbytes" */ \ + U64 v0 = UINT64_C(0x736f6d6570736575); \ + U64 v1 = UINT64_C(0x646f72616e646f6d); \ + U64 v2 = UINT64_C(0x6c7967656e657261); \ + U64 v3 = UINT64_C(0x7465646279746573); \ + \ + U64 b; \ + U64 k0 = ((const U64*)seed)[0]; \ + U64 k1 = ((const U64*)seed)[1]; \ + U64 m; \ + const int left = inlen & 7; \ + const U8 *end = in + inlen - left; \ + \ + b = ( ( U64 )(inlen) ) << 56; \ + v3 ^= k1; \ + v2 ^= k0; \ + v1 ^= k1; \ + v0 ^= k0; \ + \ + for ( ; in != end; in += 8 ) \ + { \ + m = U8TO64_LE( in ); \ + v3 ^= m; \ + \ + SIP_ROUNDS; \ + \ + v0 ^= m; \ + } \ + \ + switch( left ) \ + { \ + case 7: b |= ( ( U64 )in[ 6] ) << 48; \ + case 6: b |= ( ( U64 )in[ 5] ) << 40; \ + case 5: b |= ( ( U64 )in[ 4] ) << 32; \ + case 4: b |= ( ( U64 )in[ 3] ) << 24; \ + case 3: b |= ( ( U64 )in[ 2] ) << 16; \ + case 2: b |= ( ( U64 )in[ 1] ) << 8; \ + case 1: b |= ( ( U64 )in[ 0] ); break; \ + case 0: break; \ + } \ + \ + v3 ^= b; \ + \ + SIP_ROUNDS; \ + \ + v0 ^= b; \ + \ + v2 ^= 0xff; \ + \ + SIP_FINAL_ROUNDS \ + \ + b = v0 ^ v1 ^ v2 ^ v3; \ + return (U32)(b & U32_MAX); \ } + +PERL_SIPHASH_FNC( + S_perl_hash_siphash_1_3 + ,SIPROUND; + ,SIPROUND;SIPROUND;SIPROUND; +) + +PERL_SIPHASH_FNC( + S_perl_hash_siphash_2_4 + ,SIPROUND;SIPROUND; + ,SIPROUND;SIPROUND;SIPROUND;SIPROUND; +) + #endif /* defined(HAS_QUAD) */ + /* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in * (http://burtleburtle.net/bob/hash/doobs.html) * It is by Paul Hsieh (c) 2004 and is analysed here @@ -550,6 +581,102 @@ S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned return (hash + (hash << 15)); } +#ifdef HAS_QUAD +/* For short strings, 16 bytes or shorter, we use an optimised variant + * of One At A Time Hard, and for longer strings, we use siphash_1_3. + */ +PERL_STATIC_INLINE U32 +S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + U32 hash = *((const U32*)seed) + (U32)len; + switch (len) { + case 16: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[15]; + case 15: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[14]; + case 14: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[13]; + case 13: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[12]; + case 12: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[11]; + case 11: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[10]; + case 10: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[9]; + case 9: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[8]; + case 8: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[7]; + case 7: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[6]; + case 6: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[5]; + case 5: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[4]; + case 4: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[3]; + case 3: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[2]; + case 2: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[1]; + case 1: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += str[0]; + case 0: + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[4]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[5]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[6]; + hash += (hash << 10); + hash ^= (hash >> 6); + hash += seed[7]; + hash += (hash << 10); + hash ^= (hash >> 6); + + hash += (hash << 3); + hash ^= (hash >> 11); + return (hash + (hash << 15)); + } + return S_perl_hash_siphash_1_3(seed+8, str, len); +} +#endif /* defined(HAS_QUAD) */ + PERL_STATIC_INLINE U32 S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { const unsigned char * const end = (const unsigned char *)str + len; @@ -692,6 +819,7 @@ S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned ch } #endif + /* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 92411a23c9..778a3d475c 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1284,5 +1284,18 @@ setup => 'my $i = 0;', code => 'while (++$i % 4) {}', }, - + ( + map { + sprintf('hash::set1k::len_%04d',$_) => { + desc => 'hash keys length '. $_, + setup => 'my $i = "A" x ' . $_ . '; my @s= map { $i++ } 1..1000;', + code => 'my %h; @h{@s}=();', + }, + } ( + 1..24, + 50, + 100, + 1000, + ) + ), ]; -- Perl5 Master Repository
