Change 21471 by [EMAIL PROTECTED] on 2003/10/16 21:10:27
Plan C for foiling the algorithmic complexity attack
(based on Chip's plan A (binary compatibility with 5.8.0 and 5.8.1),
Chip's plan B (do something new inside the hv functions)
and introspective sort)
Provides infrastructure for hashes to change their hash function
if necessary, and code in hsplit to detect pathalogical data and
instigate a random rehashing.
Needs refinement. Let's see how much smoke it creates.
Affected files ...
... //depot/perl/embedvar.h#181 edit
... //depot/perl/hv.c#139 edit
... //depot/perl/hv.h#43 edit
... //depot/perl/intrpvar.h#134 edit
... //depot/perl/perl.c#527 edit
... //depot/perl/perlapi.h#103 edit
... //depot/perl/sv.c#691 edit
... //depot/perl/sv.h#149 edit
... //depot/perl/util.c#405 edit
Differences ...
==== //depot/perl/embedvar.h#181 (text+w) ====
Index: perl/embedvar.h
--- perl/embedvar.h#180~20565~ Fri Aug 8 11:59:40 2003
+++ perl/embedvar.h Thu Oct 16 14:10:27 2003
@@ -320,6 +320,8 @@
#define PL_multi_open (vTHX->Imulti_open)
#define PL_multi_start (vTHX->Imulti_start)
#define PL_multiline (vTHX->Imultiline)
+#define PL_new_hash_seed (vTHX->Inew_hash_seed)
+#define PL_new_hash_seed_set (vTHX->Inew_hash_seed_set)
#define PL_nexttoke (vTHX->Inexttoke)
#define PL_nexttype (vTHX->Inexttype)
#define PL_nextval (vTHX->Inextval)
@@ -624,6 +626,8 @@
#define PL_Imulti_open PL_multi_open
#define PL_Imulti_start PL_multi_start
#define PL_Imultiline PL_multiline
+#define PL_Inew_hash_seed PL_new_hash_seed
+#define PL_Inew_hash_seed_set PL_new_hash_seed_set
#define PL_Inexttoke PL_nexttoke
#define PL_Inexttype PL_nexttype
#define PL_Inextval PL_nextval
==== //depot/perl/hv.c#139 (text) ====
Index: perl/hv.c
--- perl/hv.c#138~21469~ Thu Oct 16 12:31:19 2003
+++ perl/hv.c Thu Oct 16 14:10:27 2003
@@ -274,7 +274,11 @@
}
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -445,7 +449,9 @@
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
@@ -628,7 +634,12 @@
if (flags)
HvHASKFLAGS_on((SV*)hv);
- if (!hash)
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
if (!xhv->xhv_array /* !HvARRAY(hv) */)
@@ -798,7 +809,12 @@
HvHASKFLAGS_on((SV*)hv);
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
@@ -950,7 +966,11 @@
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1107,8 +1127,11 @@
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1255,7 +1278,11 @@
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
#ifdef DYNAMIC_ENV_FETCH
if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
@@ -1359,7 +1386,9 @@
if (key != keysave)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
@@ -1415,6 +1444,8 @@
register HE **bep;
register HE *entry;
register HE **oentry;
+ int longest_chain = 0;
+ int was_shared;
PL_nomemok = TRUE;
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
@@ -1445,6 +1476,9 @@
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
+ int left_length = 0;
+ int right_length = 0;
+
if (!*aep) /* non-existent */
continue;
bep = aep+oldsize;
@@ -1455,14 +1489,90 @@
if (!*bep)
xhv->xhv_fill++; /* HvFILL(hv)++ */
*bep = entry;
+ right_length++;
continue;
}
- else
+ else {
oentry = &HeNEXT(entry);
+ left_length++;
+ }
}
if (!*aep) /* everything moved */
xhv->xhv_fill--; /* HvFILL(hv)-- */
+ /* I think we don't actually need to keep track of the longest length,
+ merely flag if anything is too long. But for the moment while
+ developing this code I'll track it. */
+ if (left_length > longest_chain)
+ longest_chain = left_length;
+ if (right_length > longest_chain)
+ longest_chain = right_length;
+ }
+
+
+ /* Pick your policy for "hashing isn't working" here: */
+ if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv)
+ || HvREHASH(hv)) {
+ return;
+ }
+
+ if (hv == PL_strtab) {
+ /* Urg. Someone is doing something nasty to the string table.
+ Can't win. */
+ return;
+ }
+
+ /* Awooga. Awooga. Pathological data. */
+ /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n",
+ longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
+
+ ++newsize;
+ Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ was_shared = HvSHAREKEYS(hv);
+
+ xhv->xhv_fill = 0;
+ HvSHAREKEYS_off(hv);
+ HvREHASH_on(hv);
+
+ aep = (HE **) xhv->xhv_array;
+
+ for (i=0; i<newsize; i++,aep++) {
+ entry = *aep;
+ while (entry) {
+ /* We're going to trash this HE's next pointer when we chain it
+ into the new hash below, so store where we go next. */
+ HE *next = HeNEXT(entry);
+ UV hash;
+
+ /* Rehash it */
+ PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+ if (was_shared) {
+ /* Unshare it. */
+ HEK *new_hek
+ = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+ hash, HeKFLAGS(entry));
+ unshare_hek (HeKEY_hek(entry));
+ HeKEY_hek(entry) = new_hek;
+ } else {
+ /* Not shared, so simply write the new hash in. */
+ HeHASH(entry) = hash;
+ }
+ /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+ HEK_REHASH_on(HeKEY_hek(entry));
+ /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+ /* Copy oentry to the correct new chain. */
+ bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+ if (!*bep)
+ xhv->xhv_fill++; /* HvFILL(hv)++ */
+ HeNEXT(entry) = *bep;
+ *bep = entry;
+
+ entry = next;
+ }
}
+ Safefree (xhv->xhv_array);
+ xhv->xhv_array = a; /* HvARRAY(hv) = a */
}
void
@@ -1566,6 +1676,7 @@
#ifndef NODEFAULT_SHAREKEYS
HvSHAREKEYS_on(hv); /* key-sharing on by default */
#endif
+
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */
xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */
@@ -2039,7 +2150,17 @@
sv = newSVpvn ((char*)as_utf8, utf8_len);
SvUTF8_on (sv);
Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
- } else {
+ } else if (flags & HVhek_REHASH) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK. This hv is using custom a hasing
+ algorithm. Hence we can't return a shared string scalar, as
+ that would contain the (wrong) hash value, and might get passed
+ into an hv routine with a regular hash */
+
+ sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+ if (HEK_UTF8(hek))
+ SvUTF8_on (sv);
+ } else {
sv = newSVpvn_share(HEK_KEY(hek),
(HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
HEK_HASH(hek));
==== //depot/perl/hv.h#43 (text) ====
Index: perl/hv.h
--- perl/hv.h#42~21198~ Fri Sep 12 10:59:25 2003
+++ perl/hv.h Thu Oct 16 14:10:27 2003
@@ -89,6 +89,24 @@
(hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
} STMT_END
+#ifdef PERL_IN_HV_C
+#define PERL_HASH_INTERNAL(hash,str,len) \
+ STMT_START { \
+ register const char *s_PeRlHaSh_tmp = str; \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char
*)s_PeRlHaSh_tmp; \
+ register I32 i_PeRlHaSh = len; \
+ register U32 hash_PeRlHaSh = PL_new_hash_seed; \
+ while (i_PeRlHaSh--) { \
+ hash_PeRlHaSh += *s_PeRlHaSh++; \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
+ } \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
+ (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
+ } STMT_END
+#endif
+
/*
=head1 Hash Manipulation Functions
@@ -203,6 +221,10 @@
#define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL)
#define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL)
+#define HvREHASH(hv) (SvFLAGS(hv) & SVphv_REHASH)
+#define HvREHASH_on(hv) (SvFLAGS(hv) |= SVphv_REHASH)
+#define HvREHASH_off(hv) (SvFLAGS(hv) &= ~SVphv_REHASH)
+
/* Maybe amagical: */
/* #define HV_AMAGICmb(hv) (SvFLAGS(hv) & (SVpgv_badAM | SVpgv_AM)) */
@@ -224,6 +246,7 @@
#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
#define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he))
+#define HeKREHASH(he) HEK_REHASH(HeKEY_hek(he))
#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
#define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he))
#define HeVAL(he) (he)->hent_val
@@ -254,6 +277,7 @@
#define HVhek_UTF8 0x01 /* Key is utf8 encoded. */
#define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */
+#define HVhek_REHASH 0x04 /* This key is in an hv using a custom HASH . */
#define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */
#define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder.
* (may change, but Storable is a core module) */
@@ -265,6 +289,8 @@
#define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8)
#define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8)
#define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
+#define HEK_REHASH(hek) (HEK_FLAGS(hek) & HVhek_REHASH)
+#define HEK_REHASH_on(hek) (HEK_FLAGS(hek) |= HVhek_REHASH)
/* calculate HV array allocation */
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
==== //depot/perl/intrpvar.h#134 (text) ====
Index: perl/intrpvar.h
--- perl/intrpvar.h#133~20263~ Mon Jul 28 05:13:24 2003
+++ perl/intrpvar.h Thu Oct 16 14:10:27 2003
@@ -531,6 +531,10 @@
PERLVARI(Icv_has_eval, I32, 0) /* PL_compcv includes an entereval or similar */
+PERLVARI(Inew_hash_seed, UV, 0) /* 582 hash initializer */
+
+PERLVARI(Inew_hash_seed_set, bool, FALSE) /* 582 hash initialized? */
+
/* New variables must be added to the very end, before this comment,
* for binary compatibility (the offsets of the old members must not change).
* (Don't forget to add your variable also to perl_clone()!)
==== //depot/perl/perl.c#527 (text) ====
Index: perl/perl.c
--- perl/perl.c#526~21470~ Thu Oct 16 13:03:44 2003
+++ perl/perl.c Thu Oct 16 14:10:27 2003
@@ -918,7 +918,7 @@
* it is your responsibility to provide a good random seed!
* You can also define PERL_HASH_SEED in compile time, see hv.h. */
if (!PL_hash_seed_set)
- PL_hash_seed = get_hash_seed();
+ PL_new_hash_seed = get_hash_seed();
{
char *s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
@@ -927,7 +927,7 @@
if (i == 1)
PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n",
- PL_hash_seed);
+ PL_new_hash_seed);
}
}
#endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */
==== //depot/perl/perlapi.h#103 (text+w) ====
Index: perl/perlapi.h
--- perl/perlapi.h#102~20565~ Fri Aug 8 11:59:40 2003
+++ perl/perlapi.h Thu Oct 16 14:10:27 2003
@@ -398,6 +398,10 @@
#define PL_multi_start (*Perl_Imulti_start_ptr(aTHX))
#undef PL_multiline
#define PL_multiline (*Perl_Imultiline_ptr(aTHX))
+#undef PL_new_hash_seed
+#define PL_new_hash_seed (*Perl_Inew_hash_seed_ptr(aTHX))
+#undef PL_new_hash_seed_set
+#define PL_new_hash_seed_set (*Perl_Inew_hash_seed_set_ptr(aTHX))
#undef PL_nexttoke
#define PL_nexttoke (*Perl_Inexttoke_ptr(aTHX))
#undef PL_nexttype
==== //depot/perl/sv.c#691 (text) ====
Index: perl/sv.c
--- perl/sv.c#690~21420~ Tue Oct 7 12:51:35 2003
+++ perl/sv.c Thu Oct 16 14:10:27 2003
@@ -11349,6 +11349,7 @@
PL_glob_index = proto_perl->Iglob_index;
PL_srand_called = proto_perl->Isrand_called;
PL_hash_seed = proto_perl->Ihash_seed;
+ PL_new_hash_seed = proto_perl->Inew_hash_seed;
PL_uudmap['M'] = 0; /* reinits on demand */
PL_bitcount = Nullch; /* reinits on demand */
==== //depot/perl/sv.h#149 (text) ====
Index: perl/sv.h
--- perl/sv.h#148~21468~ Thu Oct 16 12:00:14 2003
+++ perl/sv.h Thu Oct 16 14:10:27 2003
@@ -213,6 +213,7 @@
#define SVrepl_EVAL 0x40000000 /* Replacement part of s///e */
+#define SVphv_REHASH 0x10000000 /* HV is recalculating hash values */
#define SVphv_SHAREKEYS 0x20000000 /* keys live on shared string table */
#define SVphv_LAZYDEL 0x40000000 /* entry in xhv_eiter must be deleted */
#define SVphv_HASKFLAGS 0x80000000 /* keys have flag byte after hash */
==== //depot/perl/util.c#405 (text) ====
Index: perl/util.c
--- perl/util.c#404~21397~ Thu Oct 2 00:48:51 2003
+++ perl/util.c Thu Oct 16 14:10:27 2003
@@ -4427,7 +4427,7 @@
Perl_croak(aTHX_ "Your random numbers are not that random");
}
}
- PL_hash_seed_set = TRUE;
+ PL_new_hash_seed_set = TRUE;
return myseed;
}
End of Patch.