Change 21471 by [EMAIL PROTECTED] on 2003/10/16 21:10:27

        Plan C for foiling the algorithmic complexity attack
        (based on Chip's plan A (binary compatibility with 5.8.0 and 5.8.1),
         Chip's plan B (do something new inside the hv functions)
         and introspective sort)
        Provides infrastructure for hashes to change their hash function
        if necessary, and code in hsplit to detect pathalogical data and
        instigate a random rehashing.
        Needs refinement. Let's see how much smoke it creates.

Affected files ...

... //depot/perl/embedvar.h#181 edit
... //depot/perl/hv.c#139 edit
... //depot/perl/hv.h#43 edit
... //depot/perl/intrpvar.h#134 edit
... //depot/perl/perl.c#527 edit
... //depot/perl/perlapi.h#103 edit
... //depot/perl/sv.c#691 edit
... //depot/perl/sv.h#149 edit
... //depot/perl/util.c#405 edit

Differences ...

==== //depot/perl/embedvar.h#181 (text+w) ====
Index: perl/embedvar.h
--- perl/embedvar.h#180~20565~  Fri Aug  8 11:59:40 2003
+++ perl/embedvar.h     Thu Oct 16 14:10:27 2003
@@ -320,6 +320,8 @@
 #define PL_multi_open          (vTHX->Imulti_open)
 #define PL_multi_start         (vTHX->Imulti_start)
 #define PL_multiline           (vTHX->Imultiline)
+#define PL_new_hash_seed       (vTHX->Inew_hash_seed)
+#define PL_new_hash_seed_set   (vTHX->Inew_hash_seed_set)
 #define PL_nexttoke            (vTHX->Inexttoke)
 #define PL_nexttype            (vTHX->Inexttype)
 #define PL_nextval             (vTHX->Inextval)
@@ -624,6 +626,8 @@
 #define PL_Imulti_open         PL_multi_open
 #define PL_Imulti_start                PL_multi_start
 #define PL_Imultiline          PL_multiline
+#define PL_Inew_hash_seed      PL_new_hash_seed
+#define PL_Inew_hash_seed_set  PL_new_hash_seed_set
 #define PL_Inexttoke           PL_nexttoke
 #define PL_Inexttype           PL_nexttype
 #define PL_Inextval            PL_nextval

==== //depot/perl/hv.c#139 (text) ====
Index: perl/hv.c
--- perl/hv.c#138~21469~        Thu Oct 16 12:31:19 2003
+++ perl/hv.c   Thu Oct 16 14:10:27 2003
@@ -274,7 +274,11 @@
         }
     }
 
-    PERL_HASH(hash, key, klen);
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else {
+       PERL_HASH(hash, key, klen);
+    }
 
     /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
     entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -445,7 +449,9 @@
             flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
     }
 
-    if (!hash) {
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else if (!hash) {
         if SvIsCOW_shared_hash(keysv) {
             hash = SvUVX(keysv);
         } else {
@@ -628,7 +634,12 @@
     if (flags)
         HvHASKFLAGS_on((SV*)hv);
 
-    if (!hash)
+    if (HvREHASH(hv)) {
+       /* We don't have a pointer to the hv, so we have to replicate the
+          flag into every HEK, so that hv_iterkeysv can see it.  */
+       flags |= HVhek_REHASH;
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else if (!hash)
        PERL_HASH(hash, key, klen);
 
     if (!xhv->xhv_array /* !HvARRAY(hv) */)
@@ -798,7 +809,12 @@
         HvHASKFLAGS_on((SV*)hv);
     }
 
-    if (!hash) {
+    if (HvREHASH(hv)) {
+       /* We don't have a pointer to the hv, so we have to replicate the
+          flag into every HEK, so that hv_iterkeysv can see it.  */
+       flags |= HVhek_REHASH;
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else if (!hash) {
         if SvIsCOW_shared_hash(keysv) {
             hash = SvUVX(keysv);
         } else {
@@ -950,7 +966,11 @@
             k_flags |= HVhek_FREEKEY;
     }
 
-    PERL_HASH(hash, key, klen);
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else {
+       PERL_HASH(hash, key, klen);
+    }
 
     /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
     oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1107,8 +1127,11 @@
             k_flags |= HVhek_FREEKEY;
     }
 
-    if (!hash)
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else if (!hash) {
        PERL_HASH(hash, key, klen);
+    }
 
     /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
     oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1255,7 +1278,11 @@
             k_flags |= HVhek_FREEKEY;
     }
 
-    PERL_HASH(hash, key, klen);
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else {
+       PERL_HASH(hash, key, klen);
+    }
 
 #ifdef DYNAMIC_ENV_FETCH
     if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
@@ -1359,7 +1386,9 @@
         if (key != keysave)
             k_flags |= HVhek_FREEKEY;
     }
-    if (!hash)
+    if (HvREHASH(hv)) {
+       PERL_HASH_INTERNAL(hash, key, klen);
+    } else if (!hash)
        PERL_HASH(hash, key, klen);
 
 #ifdef DYNAMIC_ENV_FETCH
@@ -1415,6 +1444,8 @@
     register HE **bep;
     register HE *entry;
     register HE **oentry;
+    int longest_chain = 0;
+    int was_shared;
 
     PL_nomemok = TRUE;
 #if defined(STRANGE_MALLOC) || defined(MYMALLOC)
@@ -1445,6 +1476,9 @@
     aep = (HE**)a;
 
     for (i=0; i<oldsize; i++,aep++) {
+       int left_length = 0;
+       int right_length = 0;
+
        if (!*aep)                              /* non-existent */
            continue;
        bep = aep+oldsize;
@@ -1455,14 +1489,90 @@
                if (!*bep)
                    xhv->xhv_fill++; /* HvFILL(hv)++ */
                *bep = entry;
+               right_length++;
                continue;
            }
-           else
+           else {
                oentry = &HeNEXT(entry);
+               left_length++;
+           }
        }
        if (!*aep)                              /* everything moved */
            xhv->xhv_fill--; /* HvFILL(hv)-- */
+       /* I think we don't actually need to keep track of the longest length,
+          merely flag if anything is too long. But for the moment while
+          developing this code I'll track it.  */
+       if (left_length > longest_chain)
+           longest_chain = left_length;
+       if (right_length > longest_chain)
+           longest_chain = right_length;
+    }
+
+
+    /* Pick your policy for "hashing isn't working" here:  */
+    if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv)
+       || HvREHASH(hv)) {
+       return;
+    }
+
+    if (hv == PL_strtab) {
+       /* Urg. Someone is doing something nasty to the string table.
+          Can't win.  */
+       return;
+    }
+
+    /* Awooga. Awooga. Pathological data.  */
+    /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n",
+      longest_chain, HvTOTALKEYS(hv), HvFILL(hv),  1+HvMAX(hv));*/
+
+    ++newsize;
+    Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+    was_shared = HvSHAREKEYS(hv);
+
+    xhv->xhv_fill = 0;
+    HvSHAREKEYS_off(hv);
+    HvREHASH_on(hv);
+
+    aep = (HE **) xhv->xhv_array;
+
+    for (i=0; i<newsize; i++,aep++) {
+       entry = *aep;
+       while (entry) {
+           /* We're going to trash this HE's next pointer when we chain it
+              into the new hash below, so store where we go next.  */
+           HE *next = HeNEXT(entry);
+           UV hash;
+
+           /* Rehash it */
+           PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+           if (was_shared) {
+               /* Unshare it.  */
+               HEK *new_hek
+                   = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+                                    hash, HeKFLAGS(entry));
+               unshare_hek (HeKEY_hek(entry));
+               HeKEY_hek(entry) = new_hek;
+           } else {
+               /* Not shared, so simply write the new hash in. */
+               HeHASH(entry) = hash;
+           }
+           /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+           HEK_REHASH_on(HeKEY_hek(entry));
+           /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+           /* Copy oentry to the correct new chain.  */
+           bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+           if (!*bep)
+                   xhv->xhv_fill++; /* HvFILL(hv)++ */
+           HeNEXT(entry) = *bep;
+           *bep = entry;
+
+           entry = next;
+       }
     }
+    Safefree (xhv->xhv_array);
+    xhv->xhv_array = a;                /* HvARRAY(hv) = a */
 }
 
 void
@@ -1566,6 +1676,7 @@
 #ifndef NODEFAULT_SHAREKEYS
     HvSHAREKEYS_on(hv);         /* key-sharing on by default */
 #endif
+
     xhv->xhv_max    = 7;       /* HvMAX(hv) = 7 (start with 8 buckets) */
     xhv->xhv_fill   = 0;       /* HvFILL(hv) = 0 */
     xhv->xhv_pmroot = 0;       /* HvPMROOT(hv) = 0 */
@@ -2039,7 +2150,17 @@
             sv = newSVpvn ((char*)as_utf8, utf8_len);
             SvUTF8_on (sv);
            Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
-        } else {
+       } else if (flags & HVhek_REHASH) {
+           /* We don't have a pointer to the hv, so we have to replicate the
+              flag into every HEK. This hv is using custom a hasing
+              algorithm. Hence we can't return a shared string scalar, as
+              that would contain the (wrong) hash value, and might get passed
+              into an hv routine with a regular hash  */
+
+            sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+           if (HEK_UTF8(hek))
+               SvUTF8_on (sv);
+       } else {
             sv = newSVpvn_share(HEK_KEY(hek),
                                 (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
                                 HEK_HASH(hek));

==== //depot/perl/hv.h#43 (text) ====
Index: perl/hv.h
--- perl/hv.h#42~21198~ Fri Sep 12 10:59:25 2003
+++ perl/hv.h   Thu Oct 16 14:10:27 2003
@@ -89,6 +89,24 @@
        (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
     } STMT_END
 
+#ifdef PERL_IN_HV_C
+#define PERL_HASH_INTERNAL(hash,str,len) \
+     STMT_START        { \
+       register const char *s_PeRlHaSh_tmp = str; \
+       register const unsigned char *s_PeRlHaSh = (const unsigned char 
*)s_PeRlHaSh_tmp; \
+       register I32 i_PeRlHaSh = len; \
+       register U32 hash_PeRlHaSh = PL_new_hash_seed; \
+       while (i_PeRlHaSh--) { \
+           hash_PeRlHaSh += *s_PeRlHaSh++; \
+           hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
+           hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
+       } \
+       hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
+       hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
+       (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
+    } STMT_END
+#endif
+
 /*
 =head1 Hash Manipulation Functions
 
@@ -203,6 +221,10 @@
 #define HvLAZYDEL_on(hv)       (SvFLAGS(hv) |= SVphv_LAZYDEL)
 #define HvLAZYDEL_off(hv)      (SvFLAGS(hv) &= ~SVphv_LAZYDEL)
 
+#define HvREHASH(hv)           (SvFLAGS(hv) & SVphv_REHASH)
+#define HvREHASH_on(hv)                (SvFLAGS(hv) |= SVphv_REHASH)
+#define HvREHASH_off(hv)       (SvFLAGS(hv) &= ~SVphv_REHASH)
+
 /* Maybe amagical: */
 /* #define HV_AMAGICmb(hv)      (SvFLAGS(hv) & (SVpgv_badAM | SVpgv_AM)) */
 
@@ -224,6 +246,7 @@
 #define HeKLEN(he)             HEK_LEN(HeKEY_hek(he))
 #define HeKUTF8(he)  HEK_UTF8(HeKEY_hek(he))
 #define HeKWASUTF8(he)  HEK_WASUTF8(HeKEY_hek(he))
+#define HeKREHASH(he)  HEK_REHASH(HeKEY_hek(he))
 #define HeKLEN_UTF8(he)  (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
 #define HeKFLAGS(he)  HEK_FLAGS(HeKEY_hek(he))
 #define HeVAL(he)              (he)->hent_val
@@ -254,6 +277,7 @@
 
 #define HVhek_UTF8     0x01 /* Key is utf8 encoded. */
 #define HVhek_WASUTF8  0x02 /* Key is bytes here, but was supplied as utf8. */
+#define HVhek_REHASH   0x04 /* This key is in an hv using a custom HASH . */
 #define HVhek_FREEKEY  0x100 /* Internal flag to say key is malloc()ed.  */
 #define HVhek_PLACEHOLD        0x200 /* Internal flag to create placeholder.
                                * (may change, but Storable is a core module) */
@@ -265,6 +289,8 @@
 #define HEK_WASUTF8(hek)       (HEK_FLAGS(hek) & HVhek_WASUTF8)
 #define HEK_WASUTF8_on(hek)    (HEK_FLAGS(hek) |= HVhek_WASUTF8)
 #define HEK_WASUTF8_off(hek)   (HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
+#define HEK_REHASH(hek)                (HEK_FLAGS(hek) & HVhek_REHASH)
+#define HEK_REHASH_on(hek)     (HEK_FLAGS(hek) |= HVhek_REHASH)
 
 /* calculate HV array allocation */
 #if defined(STRANGE_MALLOC) || defined(MYMALLOC)

==== //depot/perl/intrpvar.h#134 (text) ====
Index: perl/intrpvar.h
--- perl/intrpvar.h#133~20263~  Mon Jul 28 05:13:24 2003
+++ perl/intrpvar.h     Thu Oct 16 14:10:27 2003
@@ -531,6 +531,10 @@
 
 PERLVARI(Icv_has_eval, I32, 0) /* PL_compcv includes an entereval or similar */
 
+PERLVARI(Inew_hash_seed, UV, 0)                /* 582 hash initializer */
+
+PERLVARI(Inew_hash_seed_set, bool, FALSE)      /* 582 hash initialized? */
+
 /* New variables must be added to the very end, before this comment,
  * for binary compatibility (the offsets of the old members must not change).
  * (Don't forget to add your variable also to perl_clone()!)

==== //depot/perl/perl.c#527 (text) ====
Index: perl/perl.c
--- perl/perl.c#526~21470~      Thu Oct 16 13:03:44 2003
+++ perl/perl.c Thu Oct 16 14:10:27 2003
@@ -918,7 +918,7 @@
      * it is your responsibility to provide a good random seed!
      * You can also define PERL_HASH_SEED in compile time, see hv.h. */
     if (!PL_hash_seed_set)
-        PL_hash_seed = get_hash_seed();
+        PL_new_hash_seed = get_hash_seed();
     {
         char *s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
 
@@ -927,7 +927,7 @@
 
              if (i == 1)
                   PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n",
-                                PL_hash_seed);
+                                PL_new_hash_seed);
         }
     }
 #endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */

==== //depot/perl/perlapi.h#103 (text+w) ====
Index: perl/perlapi.h
--- perl/perlapi.h#102~20565~   Fri Aug  8 11:59:40 2003
+++ perl/perlapi.h      Thu Oct 16 14:10:27 2003
@@ -398,6 +398,10 @@
 #define PL_multi_start         (*Perl_Imulti_start_ptr(aTHX))
 #undef  PL_multiline
 #define PL_multiline           (*Perl_Imultiline_ptr(aTHX))
+#undef  PL_new_hash_seed
+#define PL_new_hash_seed       (*Perl_Inew_hash_seed_ptr(aTHX))
+#undef  PL_new_hash_seed_set
+#define PL_new_hash_seed_set   (*Perl_Inew_hash_seed_set_ptr(aTHX))
 #undef  PL_nexttoke
 #define PL_nexttoke            (*Perl_Inexttoke_ptr(aTHX))
 #undef  PL_nexttype

==== //depot/perl/sv.c#691 (text) ====
Index: perl/sv.c
--- perl/sv.c#690~21420~        Tue Oct  7 12:51:35 2003
+++ perl/sv.c   Thu Oct 16 14:10:27 2003
@@ -11349,6 +11349,7 @@
     PL_glob_index      = proto_perl->Iglob_index;
     PL_srand_called    = proto_perl->Isrand_called;
     PL_hash_seed       = proto_perl->Ihash_seed;
+    PL_new_hash_seed   = proto_perl->Inew_hash_seed;
     PL_uudmap['M']     = 0;            /* reinits on demand */
     PL_bitcount                = Nullch;       /* reinits on demand */
 

==== //depot/perl/sv.h#149 (text) ====
Index: perl/sv.h
--- perl/sv.h#148~21468~        Thu Oct 16 12:00:14 2003
+++ perl/sv.h   Thu Oct 16 14:10:27 2003
@@ -213,6 +213,7 @@
 
 #define SVrepl_EVAL    0x40000000      /* Replacement part of s///e */
 
+#define SVphv_REHASH   0x10000000      /* HV is recalculating hash values */
 #define SVphv_SHAREKEYS 0x20000000     /* keys live on shared string table */
 #define SVphv_LAZYDEL  0x40000000      /* entry in xhv_eiter must be deleted */
 #define SVphv_HASKFLAGS        0x80000000      /* keys have flag byte after hash */

==== //depot/perl/util.c#405 (text) ====
Index: perl/util.c
--- perl/util.c#404~21397~      Thu Oct  2 00:48:51 2003
+++ perl/util.c Thu Oct 16 14:10:27 2003
@@ -4427,7 +4427,7 @@
                  Perl_croak(aTHX_ "Your random numbers are not that random");
          }
      }
-     PL_hash_seed_set = TRUE;
+     PL_new_hash_seed_set = TRUE;
 
      return myseed;
 }
End of Patch.

Reply via email to