Changeset: f520fd3fd555 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/f520fd3fd555
Modified Files:
        gdk/gdk_atoms.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        sql/test/BugTracker-2023/Tests/misc-crashes-7390.test
Branch: Dec2023
Log Message:

Floating point types should not use the bit patterns for hashing.
Since 0.0 == -0.0, but 0.0 and -0.0 have different bit representations,
we can't use int/lng hashes to calculate hashes for flt/dbl.  Also,
there are multiple representations for NaN (which we use for NULL) that
all have to map to the same hash value.


diffs (175 lines):

diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c
--- a/gdk/gdk_atoms.c
+++ b/gdk/gdk_atoms.c
@@ -124,6 +124,26 @@ hgeHash(const hge *v)
 }
 #endif
 
+static BUN
+fltHash(const flt *v)
+{
+       if (is_flt_nil(*v))
+               return (BUN) mix_int(GDK_int_min);
+       if (*v == 0)
+               return (BUN) mix_int(0);
+       return (BUN) mix_int(*(const unsigned int *) v);
+}
+
+static BUN
+dblHash(const dbl *v)
+{
+       if (is_dbl_nil(*v))
+               return (BUN) mix_lng(GDK_lng_min);
+       if (*v == 0)
+               return (BUN) mix_lng(0);
+       return (BUN) mix_lng(*(const ulng *) v);
+}
+
 /*
  * @+ Standard Atoms
  */
@@ -1786,7 +1806,7 @@ atomDesc BATatoms[MAXATOMS] = {
                .atomRead = (void *(*)(void *, size_t *, stream *, size_t)) 
fltRead,
                .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) 
fltWrite,
                .atomCmp = (int (*)(const void *, const void *)) fltCmp,
-               .atomHash = (BUN (*)(const void *)) intHash,
+               .atomHash = (BUN (*)(const void *)) fltHash,
        },
        [TYPE_dbl] = {
                .name = "dbl",
@@ -1799,7 +1819,7 @@ atomDesc BATatoms[MAXATOMS] = {
                .atomRead = (void *(*)(void *, size_t *, stream *, size_t)) 
dblRead,
                .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) 
dblWrite,
                .atomCmp = (int (*)(const void *, const void *)) dblCmp,
-               .atomHash = (BUN (*)(const void *)) lngHash,
+               .atomHash = (BUN (*)(const void *)) dblHash,
        },
        [TYPE_lng] = {
                .name = "lng",
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -102,10 +102,12 @@ HASHclear(Hash *h)
        memset(h->Bckt, 0xFF, h->nbucket * h->width);
 }
 
-#define HASH_VERSION           5
-/* this is only for the change of hash function of the UUID type and MBR
- * type; if HASH_VERSION is increased again from 5, the code associated
- * with HASH_VERSION_NOUUID and HASH_VERSION_NOMBR must be deleted */
+#define HASH_VERSION           6
+/* this is only for the change of hash function of the floating point
+ * types, the UUID type and the MBR type; if HASH_VERSION is increased
+ * again from 6, the code associated with HASH_VERSION_NOUUID and
+ * HASH_VERSION_NOMBR must be deleted */
+#define HASH_VERSION_FLOAT     5
 #define HASH_VERSION_NOMBR     4
 #define HASH_VERSION_NOUUID    3
 #define HASH_HEADER_SIZE       7       /* nr of size_t fields in header */
@@ -509,6 +511,8 @@ BATcheckhash(BAT *b)
                                                         ((size_t) 1 << 24) |
 #endif
                                                         HASH_VERSION_NOUUID) &&
+                                                strcmp(ATOMname(b->ttype), 
"flt") != 0 &&
+                                                strcmp(ATOMname(b->ttype), 
"dbl") != 0 &&
                                                 strcmp(ATOMname(b->ttype), 
"uuid") != 0 &&
                                                 strcmp(ATOMname(b->ttype), 
"mbr") != 0)
 #endif
@@ -519,8 +523,20 @@ BATcheckhash(BAT *b)
                                                         ((size_t) 1 << 24) |
 #endif
                                                         HASH_VERSION_NOMBR) &&
+                                                strcmp(ATOMname(b->ttype), 
"flt") != 0 &&
+                                                strcmp(ATOMname(b->ttype), 
"dbl") != 0 &&
                                                 strcmp(ATOMname(b->ttype), 
"mbr") != 0)
 #endif
+#ifdef HASH_VERSION_FLOAT
+                                            /* if not floating point, also 
allow previous version */
+                                            || (hdata[0] == (
+#ifdef PERSISTENTHASH
+                                                        ((size_t) 1 << 24) |
+#endif
+                                                        HASH_VERSION_FLOAT) &&
+                                                strcmp(ATOMname(b->ttype), 
"flt") != 0 &&
+                                                strcmp(ATOMname(b->ttype), 
"dbl") != 0)
+#endif
                                                    ) &&
                                            hdata[1] > 0 &&
                                            (
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -210,8 +210,8 @@ HASHgetlink(const Hash *h, BUN i)
 #define hash_oid(H,V)  hash_lng(H,V)
 #endif
 
-#define hash_flt(H,V)  hash_int(H,V)
-#define hash_dbl(H,V)  hash_lng(H,V)
+#define hash_flt(H,V)  HASHbucket(H, ATOMhash(TYPE_flt, (V)))
+#define hash_dbl(H,V)  HASHbucket(H, ATOMhash(TYPE_dbl, (V)))
 
 static inline BUN __attribute__((__pure__))
 mix_uuid(const uuid *u)
diff --git a/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test 
b/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test
--- a/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test
+++ b/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test
@@ -322,7 +322,7 @@ DROP TABLE v0
 statement ok
 CREATE TABLE v0 (v1 INTEGER PRIMARY KEY)
 
-query I nosort
+query R nosort
 SELECT 67 + 0 + -1 + 96 + 46463082.000000 + 30 AS v2 FROM v0 WHERE 255 = v1 
LIMIT 66 OFFSET 16
 ----
 
@@ -347,16 +347,16 @@ CREATE TABLE v0(v1 FLOAT)
 statement ok
 INSERT INTO v0 VALUES (0),(67),(127),(-1),(NULL),(NULL),(NULL),(NULL)
 
-query IT nosort
+query RT nosort
 SELECT * , 'x' FROM v0 WHERE (SELECT 39 WHERE (v1 + -32768 NOT IN (14, 255))) 
* 87 + 2147483647
 ----
-0
+0.000
 x
-67
+67.000
 x
-127
+127.000
 x
--1
+-1.000
 x
 
 statement ok
@@ -367,20 +367,11 @@ SELECT count(*) FROM v0
 ----
 131080
 
--- after the second insert, rerun same query as above, now it crashes server 
with assertion failure
--- gdk/gdk_bat.c:2098: BATsetcount: Assertion `b->batCapacity >= cnt' failed.
-skipif knownfail
-query IT nosort
+-- after the second insert, rerun same query as above, since it used to crash 
the server
+query RT nosort
 SELECT * , 'x' FROM v0 WHERE (SELECT 39 WHERE (v1 + -32768 NOT IN (14, 255))) 
* 87 + 2147483647
 ----
-0
-x
-67
-x
-127
-x
--1
-x
+131080 values hashing to 6b7d0af1bd7606b28955e7b023646d9d
 
 -- mul_bte_bte_bte: ERROR: 22003!overflow in calculation 87*39.
 
@@ -576,3 +567,4 @@ DELETE FROM v0 WHERE EXISTS ( SELECT 2 F
 
 statement ok
 DROP TABLE v0
+
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to