Changeset: 6115f4070cd0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/6115f4070cd0
Modified Files:
        gdk/gdk.h
        gdk/gdk_atoms.h
        gdk/gdk_batop.c
        gdk/gdk_group.c
        gdk/gdk_join.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
Branch: ustr
Log Message:

Use vkey property in bat iterator when we can use offsets instead of strings.


diffs (158 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -551,6 +551,12 @@ gdk_export void BBPtmunlock(void);
 
 gdk_export BAT *BBPquickdesc(bat b);
 
+typedef var_t stridx_t;
+#define SIZEOF_STRIDX_T SIZEOF_VAR_T
+#define GDK_VARALIGN SIZEOF_STRIDX_T
+
+#include "gdk_atoms.h"
+
 /* BAT iterator, also protects use of BAT heaps with reference counts.
  *
  * A BAT iterator has to be used with caution, but it does have to be
@@ -657,7 +663,11 @@ bat_iterator_nolock(BAT *b)
                        .maxpos = isview ? BUN_NONE : b->tmaxpos,
                        .unique_est = b->tunique_est,
                        .key = b->tkey,
-                       .vkey = b->tvkey || (b->tvheap && 
BBP_desc(b->tvheap->parentid)->tvkey),
+                       .vkey = (b->tvheap &&
+                                (b->tvkey ||
+                                 BBP_desc(b->tvheap->parentid)->tvkey ||
+                                 (ATOMstorage(b->ttype) == TYPE_str &&
+                                  GDK_ELIMDOUBLES(b->tvheap)))),
                        .nonil = b->tnonil,
                        .nil = b->tnil,
                        .sorted = b->tsorted,
@@ -828,11 +838,6 @@ gdk_export BUN ORDERfndlast(BAT *b, Heap
 
 gdk_export BUN BUNfnd(BAT *b, const void *right);
 
-typedef var_t stridx_t;
-#define SIZEOF_STRIDX_T SIZEOF_VAR_T
-#define GDK_VARALIGN SIZEOF_STRIDX_T
-
-#include "gdk_atoms.h"
 #include "gdk_cand.h"
 
 __attribute__((__pure__))
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -376,8 +376,8 @@ ATOMreplaceVAR(BAT *b, var_t *dst, const
 #define GDK_STRHASHMASK                (GDK_STRHASHTABLE-1)
 #define GDK_STRHASHSIZE                (GDK_STRHASHTABLE * sizeof(stridx_t))
 #define GDK_ELIMPOWER          16      /* 64KiB is the threshold */
+#define GDK_ELIMLIMIT          (1<<GDK_ELIMPOWER)      /* equivalently: 
ELIMBASE == 0 */
 #define GDK_ELIMDOUBLES(h)     ((h)->free < GDK_ELIMLIMIT)
-#define GDK_ELIMLIMIT          (1<<GDK_ELIMPOWER)      /* equivalently: 
ELIMBASE == 0 */
 #define GDK_ELIMBASE(x)                (((x) >> GDK_ELIMPOWER) << 
GDK_ELIMPOWER)
 #define GDK_VAROFFSET          ((var_t) GDK_STRHASHSIZE)
 
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -3347,11 +3347,15 @@ BATcount_no_nil(BAT *b, BAT *s)
                         * not, return BATcount(b), else count offsets
                         * != nil offset */
                }
-               if (GDK_ELIMDOUBLES(bi.vh)) {
-                       off = strLocate(bi.vh, str_nil);
-                       if (off == (var_t) -2) {
-                               cnt = ci.ncand;
-                               break;
+               if (bi.vkey) {
+                       if (GDK_ELIMDOUBLES(bi.vh)) {
+                               off = strLocate(bi.vh, str_nil);
+                               if (off == (var_t) -2) {
+                                       cnt = ci.ncand;
+                                       break;
+                               }
+                       } else {
+                               off = 0;
                        }
                        switch (bi.width) {
                        case 1:
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -901,7 +901,7 @@ BATgroup_internal(BAT **groups, BAT **ex
        /* for strings we can use the offset instead of the actual
         * string values if we know that the strings in the string
         * heap are unique */
-       if (t == TYPE_str && GDK_ELIMDOUBLES(bi.vh)) {
+       if (t == TYPE_str && bi.vkey) {
                switch (bi.width) {
                case 1:
                        t = TYPE_bte;
@@ -1185,7 +1185,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                hs->heaplink.parentid = b->batCacheid;
                if (snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshgrpl%x", nme, (unsigned) MT_getpid()) >= 
(int) sizeof(hs->heaplink.filename) ||
                    snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshgrpb%x", nme, (unsigned) MT_getpid()) >= 
(int) sizeof(hs->heapbckt.filename) ||
-                   HASHnew(hs, bi.type, BATcount(b), nbucket, BUN_NONE, false) 
!= GDK_SUCCEED) {
+                   HASHnew(hs, t, BATcount(b), nbucket, BUN_NONE, false) != 
GDK_SUCCEED) {
                        GDKfree(hs);
                        hs = NULL;
                        GDKerror("cannot allocate hash table\n");
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2804,7 +2804,8 @@ vkeyjoin(BAT **r1p, BAT **r2p, BAT **r3p
        BATiter li = bat_iterator(l);
        BATiter ri = bat_iterator(r);
 
-       var_t niloff = GDK_ELIMDOUBLES(li.vh) ? strLocate(li.vh, str_nil) : 0;
+       var_t niloff = (ATOMstorage(li.type) == TYPE_str &&
+                       GDK_ELIMDOUBLES(li.vh)) ? strLocate(li.vh, str_nil) : 0;
        assert(niloff != (var_t) -2);
 
        bit defmark = 0;
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -616,15 +616,14 @@ fullscan_str(BATiter *bi, struct candite
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
 
        if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
-           strcmp(tl, str_nil) != 0 &&
-           strLocate(bi->vh, str_nil) == (var_t) -2) {
+           !strNil(tl) && strLocate(bi->vh, str_nil) == (var_t) -2) {
                /* anti-equi select for non-nil value, and there are no
                 * nils, so we can use fast path; trigger by setting
                 * nonil */
                bi->nonil = true;
        }
        if (!((equi ||
-              (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
+              (anti && tl == th && (bi->nonil || strNil(tl)))) &&
              GDK_ELIMDOUBLES(bi->vh)))
                return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
                                    nil_matches, lval, hval, lnil, cnt, hseq,
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -108,9 +108,7 @@ BATunique(BAT *b, BAT *s)
        hseq = b->hseqbase;
 
        if (ATOMbasetype(bi.type) == TYPE_bte ||
-           (bi.width == 1 &&
-            ATOMstorage(bi.type) == TYPE_str &&
-            (GDK_ELIMDOUBLES(bi.vh) || bi.vkey))) {
+           (bi.width == 1 && bi.vkey)) {
                uint8_t val;
 
                algomsg = "unique: byte-sized atoms";
@@ -134,9 +132,7 @@ BATunique(BAT *b, BAT *s)
                TIMEOUT_CHECK(qry_ctx,
                              GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, 
qry_ctx));
        } else if (ATOMbasetype(bi.type) == TYPE_sht ||
-                  (bi.width == 2 &&
-                   ATOMstorage(bi.type) == TYPE_str &&
-                   (GDK_ELIMDOUBLES(bi.vh) || bi.vkey))) {
+                  (bi.width == 2 && bi.vkey)) {
                uint16_t val;
 
                algomsg = "unique: short-sized atoms";
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to