Changeset: 6115f4070cd0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/6115f4070cd0
Modified Files:
gdk/gdk.h
gdk/gdk_atoms.h
gdk/gdk_batop.c
gdk/gdk_group.c
gdk/gdk_join.c
gdk/gdk_select.c
gdk/gdk_unique.c
Branch: ustr
Log Message:
Use vkey property in bat iterator when we can use offsets instead of strings.
diffs (158 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -551,6 +551,12 @@ gdk_export void BBPtmunlock(void);
gdk_export BAT *BBPquickdesc(bat b);
+typedef var_t stridx_t;
+#define SIZEOF_STRIDX_T SIZEOF_VAR_T
+#define GDK_VARALIGN SIZEOF_STRIDX_T
+
+#include "gdk_atoms.h"
+
/* BAT iterator, also protects use of BAT heaps with reference counts.
*
* A BAT iterator has to be used with caution, but it does have to be
@@ -657,7 +663,11 @@ bat_iterator_nolock(BAT *b)
.maxpos = isview ? BUN_NONE : b->tmaxpos,
.unique_est = b->tunique_est,
.key = b->tkey,
- .vkey = b->tvkey || (b->tvheap &&
BBP_desc(b->tvheap->parentid)->tvkey),
+ .vkey = (b->tvheap &&
+ (b->tvkey ||
+ BBP_desc(b->tvheap->parentid)->tvkey ||
+ (ATOMstorage(b->ttype) == TYPE_str &&
+ GDK_ELIMDOUBLES(b->tvheap)))),
.nonil = b->tnonil,
.nil = b->tnil,
.sorted = b->tsorted,
@@ -828,11 +838,6 @@ gdk_export BUN ORDERfndlast(BAT *b, Heap
gdk_export BUN BUNfnd(BAT *b, const void *right);
-typedef var_t stridx_t;
-#define SIZEOF_STRIDX_T SIZEOF_VAR_T
-#define GDK_VARALIGN SIZEOF_STRIDX_T
-
-#include "gdk_atoms.h"
#include "gdk_cand.h"
__attribute__((__pure__))
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -376,8 +376,8 @@ ATOMreplaceVAR(BAT *b, var_t *dst, const
#define GDK_STRHASHMASK (GDK_STRHASHTABLE-1)
#define GDK_STRHASHSIZE (GDK_STRHASHTABLE * sizeof(stridx_t))
#define GDK_ELIMPOWER 16 /* 64KiB is the threshold */
+#define GDK_ELIMLIMIT (1<<GDK_ELIMPOWER) /* equivalently:
ELIMBASE == 0 */
#define GDK_ELIMDOUBLES(h) ((h)->free < GDK_ELIMLIMIT)
-#define GDK_ELIMLIMIT (1<<GDK_ELIMPOWER) /* equivalently:
ELIMBASE == 0 */
#define GDK_ELIMBASE(x) (((x) >> GDK_ELIMPOWER) <<
GDK_ELIMPOWER)
#define GDK_VAROFFSET ((var_t) GDK_STRHASHSIZE)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -3347,11 +3347,15 @@ BATcount_no_nil(BAT *b, BAT *s)
* not, return BATcount(b), else count offsets
* != nil offset */
}
- if (GDK_ELIMDOUBLES(bi.vh)) {
- off = strLocate(bi.vh, str_nil);
- if (off == (var_t) -2) {
- cnt = ci.ncand;
- break;
+ if (bi.vkey) {
+ if (GDK_ELIMDOUBLES(bi.vh)) {
+ off = strLocate(bi.vh, str_nil);
+ if (off == (var_t) -2) {
+ cnt = ci.ncand;
+ break;
+ }
+ } else {
+ off = 0;
}
switch (bi.width) {
case 1:
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -901,7 +901,7 @@ BATgroup_internal(BAT **groups, BAT **ex
/* for strings we can use the offset instead of the actual
* string values if we know that the strings in the string
* heap are unique */
- if (t == TYPE_str && GDK_ELIMDOUBLES(bi.vh)) {
+ if (t == TYPE_str && bi.vkey) {
switch (bi.width) {
case 1:
t = TYPE_bte;
@@ -1185,7 +1185,7 @@ BATgroup_internal(BAT **groups, BAT **ex
hs->heaplink.parentid = b->batCacheid;
if (snprintf(hs->heaplink.filename,
sizeof(hs->heaplink.filename), "%s.thshgrpl%x", nme, (unsigned) MT_getpid()) >=
(int) sizeof(hs->heaplink.filename) ||
snprintf(hs->heapbckt.filename,
sizeof(hs->heapbckt.filename), "%s.thshgrpb%x", nme, (unsigned) MT_getpid()) >=
(int) sizeof(hs->heapbckt.filename) ||
- HASHnew(hs, bi.type, BATcount(b), nbucket, BUN_NONE, false)
!= GDK_SUCCEED) {
+ HASHnew(hs, t, BATcount(b), nbucket, BUN_NONE, false) !=
GDK_SUCCEED) {
GDKfree(hs);
hs = NULL;
GDKerror("cannot allocate hash table\n");
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2804,7 +2804,8 @@ vkeyjoin(BAT **r1p, BAT **r2p, BAT **r3p
BATiter li = bat_iterator(l);
BATiter ri = bat_iterator(r);
- var_t niloff = GDK_ELIMDOUBLES(li.vh) ? strLocate(li.vh, str_nil) : 0;
+ var_t niloff = (ATOMstorage(li.type) == TYPE_str &&
+ GDK_ELIMDOUBLES(li.vh)) ? strLocate(li.vh, str_nil) : 0;
assert(niloff != (var_t) -2);
bit defmark = 0;
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -616,15 +616,14 @@ fullscan_str(BATiter *bi, struct candite
QryCtx *qry_ctx = MT_thread_get_qry_ctx();
if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
- strcmp(tl, str_nil) != 0 &&
- strLocate(bi->vh, str_nil) == (var_t) -2) {
+ !strNil(tl) && strLocate(bi->vh, str_nil) == (var_t) -2) {
/* anti-equi select for non-nil value, and there are no
* nils, so we can use fast path; trigger by setting
* nonil */
bi->nonil = true;
}
if (!((equi ||
- (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
+ (anti && tl == th && (bi->nonil || strNil(tl)))) &&
GDK_ELIMDOUBLES(bi->vh)))
return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
nil_matches, lval, hval, lnil, cnt, hseq,
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -108,9 +108,7 @@ BATunique(BAT *b, BAT *s)
hseq = b->hseqbase;
if (ATOMbasetype(bi.type) == TYPE_bte ||
- (bi.width == 1 &&
- ATOMstorage(bi.type) == TYPE_str &&
- (GDK_ELIMDOUBLES(bi.vh) || bi.vkey))) {
+ (bi.width == 1 && bi.vkey)) {
uint8_t val;
algomsg = "unique: byte-sized atoms";
@@ -134,9 +132,7 @@ BATunique(BAT *b, BAT *s)
TIMEOUT_CHECK(qry_ctx,
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed,
qry_ctx));
} else if (ATOMbasetype(bi.type) == TYPE_sht ||
- (bi.width == 2 &&
- ATOMstorage(bi.type) == TYPE_str &&
- (GDK_ELIMDOUBLES(bi.vh) || bi.vkey))) {
+ (bi.width == 2 && bi.vkey)) {
uint16_t val;
algomsg = "unique: short-sized atoms";
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]