In perl.git, the branch yves/hv_h_split has been updated <http://perl5.git.perl.org/perl.git/commitdiff/66c5e7c96ab95ab7fba6b5eeb1f7175bd8138b05?hp=c2669ab8fb2498011780806aa2073f809a3d61ab>
- Log ----------------------------------------------------------------- commit 66c5e7c96ab95ab7fba6b5eeb1f7175bd8138b05 Author: Yves Orton <[email protected]> Date: Mon Feb 18 10:18:48 2013 +0100 randomize bucket split insertion in hsplit() M hv.c commit a4c171affab792d1ea2c4b05cc4a305c42a43f17 Author: Yves Orton <[email protected]> Date: Mon Feb 18 09:36:35 2013 +0100 turn the ptr_hash inline code into a true sub Borrowed from autobox, which is released under the same terms as Perl. M embed.fnc M embed.h M hv.c M proto.h ----------------------------------------------------------------------- Summary of changes: embed.fnc | 1 + embed.h | 1 + hv.c | 82 ++++++++++++++++++++++++++++++++++++++---------------------- proto.h | 1 + 4 files changed, 55 insertions(+), 30 deletions(-) diff --git a/embed.fnc b/embed.fnc index 31ce911..40e6583 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1777,6 +1777,7 @@ sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg +sn |U32|ptr_hash|PTRV u sn |struct xpvhv_aux*|hv_auxinit|NN HV *hv sM |SV* |hv_delete_common|NULLOK HV *hv|NULLOK SV *keysv \ |NULLOK const char *key|STRLEN klen|int k_flags|I32 d_flags \ diff --git a/embed.h b/embed.h index 0460505..aeeda26 100644 --- a/embed.h +++ b/embed.h @@ -1382,6 +1382,7 @@ #define hv_magic_check S_hv_magic_check #define hv_notallowed(a,b,c,d) S_hv_notallowed(aTHX_ a,b,c,d) #define new_he() S_new_he(aTHX) +#define ptr_hash S_ptr_hash #define refcounted_he_value(a) S_refcounted_he_value(aTHX_ a) #define save_hek_flags S_save_hek_flags #define share_hek_flags(a,b,c,d) S_share_hek_flags(aTHX_ a,b,c,d) diff --git a/hv.c b/hv.c index 98be634..87c34f6 100644 --- a/hv.c +++ b/hv.c @@ -1092,6 +1092,7 @@ S_hsplit(pTHX_ HV *hv) const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */ I32 newsize = oldsize * 2; I32 i; + U32 bucket_rand; char *a = (char*) HvARRAY(hv); HE **aep; @@ -1139,6 +1140,13 @@ S_hsplit(pTHX_ HV *hv) HvARRAY(hv) = (HE**) a; aep = (HE**)a; + /* the idea of this is that we create a "random" value by hashing the address of + * the array, we then use the low bit to decide if we insert at the top, or insert + * second from top. After each such insert we rotate the hashed value. So we can + * use the same hashed value over and over, and in normal build environments use + * very few ops to do so. ROTL32() should produce a single machine operation. */ + bucket_rand= ptr_hash((PTRV)a); + for (i=0; i<oldsize; i++,aep++) { HE **oentry = aep; HE *entry = *aep; @@ -1150,17 +1158,21 @@ S_hsplit(pTHX_ HV *hv) do { if ((HeHASH(entry) & newsize) != (U32)i) { *oentry = HeNEXT(entry); - HeNEXT(entry) = *bep; - *bep = entry; + if (!*bep || (bucket_rand & 1)) { + HeNEXT(entry) = *bep; + *bep = entry; + } else { + HE *tmp= HeNEXT(*bep); + HeNEXT(*bep)= entry; + HeNEXT(entry)= tmp; + } + ROTL32(bucket_rand,1); } else { oentry = &HeNEXT(entry); } entry = *oentry; } while (entry); - /* I think we don't actually need to keep track of the longest length, - merely flag if anything is too long. But for the moment while - developing this code I'll track it. */ } } @@ -1816,6 +1828,40 @@ Perl_hv_fill(pTHX_ HV const *const hv) return count; } +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ + +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} + + static struct xpvhv_aux* S_hv_auxinit(HV *hv) { struct xpvhv_aux *iter; @@ -1832,31 +1878,7 @@ S_hv_auxinit(HV *hv) { + sizeof(struct xpvhv_aux), char); } if (!HvRAND(hv)) { - PTRV u= (PTRV)array; -#if PTRSIZE == 8 - /* - * This is one of Thomas Wang's hash functions for 64-bit integers from: - * http://www.concentric.net/~Ttwang/tech/inthash.htm - */ - u = (~u) + (u << 18); - u = u ^ (u >> 31); - u = u * 21; - u = u ^ (u >> 11); - u = u + (u << 6); - u = u ^ (u >> 22); -#else - /* - * This is one of Bob Jenkins' hash functions for 32-bit integers - * from: http://burtleburtle.net/bob/hash/integer.html - */ - u = (u + 0x7ed55d16) + (u << 12); - u = (u ^ 0xc761c23c) ^ (u >> 19); - u = (u + 0x165667b1) + (u << 5); - u = (u + 0xd3a2646c) ^ (u << 9); - u = (u + 0xfd7046c5) + (u << 3); - u = (u ^ 0xb55a4f09) ^ (u >> 16); -#endif - HvRAND(hv)= (U32)u; + HvRAND(hv)= ptr_hash((PTRV)array); } HvARRAY(hv) = (HE**) array; diff --git a/proto.h b/proto.h index a2cf682..ae36298 100644 --- a/proto.h +++ b/proto.h @@ -5732,6 +5732,7 @@ STATIC HE* S_new_he(pTHX) __attribute__malloc__ __attribute__warn_unused_result__; +STATIC U32 S_ptr_hash(PTRV u); STATIC SV * S_refcounted_he_value(pTHX_ const struct refcounted_he *he) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE \ -- Perl5 Master Repository
