Changeset: ea84acf2dbc1 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ea84acf2dbc1
Modified Files:
gdk/gdk_join.c
Branch: Jul2021
Log Message:
Use hashjoin even for dense right-hand side with bitmask candidate lists.
diffs (134 lines):
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2570,11 +2570,10 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
Hash *restrict hsh = NULL;
bool locked = false;
- assert(!BATtvoid(r));
assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
int t = ATOMbasetype(r->ttype);
- if (r->ttype == TYPE_void || l->ttype == TYPE_void)
+ if (BATtvoid(r) || BATtvoid(l))
t = TYPE_void;
lwidth = l->twidth;
@@ -2644,6 +2643,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
"existing hash%s\n",
ALGOBATPAR(r),
swapped ? " (swapped)" : "");
+ } else if (BATtdense(r)) {
+ /* no hash, just dense lookup */
+ MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)"
: "hashjoin on dense");
} else {
/* we need to create a hash on r */
MT_thread_setalgorithm(swapped ? "hashjoin using new hash
(swapped)" : "hashjoin using new hash");
@@ -2654,7 +2656,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
goto bailout;
hsh = r->thash;
}
- assert(hsh != NULL);
+ assert(hsh != NULL || BATtdense(r));
ri = bat_iterator(r);
@@ -2674,7 +2676,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
false, false, __func__,
t0);
}
}
- } else {
+ } else if (!BATtdense(r)) {
for (rb = HASHget(hsh, HASHprobe(hsh, nil));
rb != HASHnil(hsh);
rb = HASHgetlink(hsh, rb)) {
@@ -2715,19 +2717,17 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
HASHJOIN(uuid);
break;
default:
- if (!hash_cand) {
+ if (!hash_cand && hsh) {
MT_rwlock_rdlock(&r->thashlock);
locked = true; /* in case we abandon */
hsh = r->thash; /* re-initialize inside lock */
}
while (lci->next < lci->ncand) {
lo = canditer_next(lci);
- if (BATtvoid(l)) {
- if (BATtdense(l))
- lval = lo - l->hseqbase + l->tseqbase;
- } else {
+ if (BATtdense(l))
+ lval = lo - l->hseqbase + l->tseqbase;
+ else if (l->ttype != TYPE_void)
v = VALUE(l, lo - l->hseqbase);
- }
nr = 0;
if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
/* no match */
@@ -2750,6 +2750,23 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
if (semi && !max_one)
break;
}
+ } else if (hsh == NULL) {
+ assert(BATtdense(r));
+ ro = *(const oid *) v;
+ if (ro >= r->tseqbase &&
+ ro < r->tseqbase + r->batCount) {
+ ro -= r->tseqbase;
+ ro += rseq;
+ if (canditer_contains(rci, ro)) {
+ if (only_misses) {
+ nr++;
+ break;
+ }
+ HASHLOOPBODY();
+ if (semi && !max_one)
+ break;
+ }
+ }
} else if (rci->tpe != cand_dense) {
for (rb = HASHget(hsh, HASHprobe(hsh, v));
rb != HASHnil(hsh);
@@ -2824,7 +2841,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
if (nr > 0 && BATcount(r1) > nr)
r1->trevsorted = false;
}
- if (!hash_cand) {
+ if (!hash_cand && hsh) {
locked = false;
MT_rwlock_rdunlock(&r->thashlock);
}
@@ -3135,13 +3152,18 @@ joincost(BAT *r, struct canditer *lci, s
bat parent;
BAT *b;
- if (rci->nvals > 0) {
- /* if we need to do binary search on candidate
- * list, take that into account */
+ if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
+ rci->nvals > 0) {
+ /* if we need to do binary search on candidate list,
+ * take that into account; note checking the other
+ * candidate types is essentially free */
rcost += log2((double) rci->nvals);
}
rcost *= lci->ncand;
- if (rhash) {
+ if (BATtdense(r)) {
+ /* no need for a hash, and lookup is free */
+ rhash = false; /* don't use it, even if it's there */
+ } else if (rhash) {
/* average chain length */
rcost *= (double) BATcount(r) / r->thash->nheads;
} else if ((parent = VIEWtparent(r)) != 0 &&
@@ -3619,8 +3641,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
rc = selectjoin(r1p, r2p, l, r, &lci, &rci,
nil_matches, t0, false, func);
goto doreturn;
- } else if (BATtdense(r) && rci.tpe == cand_dense &&
- lcnt > 0 && rcnt > 0) {
+ } else if (BATtdense(r) && rci.tpe == cand_dense) {
/* use special implementation for dense right-hand side */
rc = mergejoin_void(r1p, r2p, l, r, &lci, &rci,
nil_on_miss, only_misses, t0, false,
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list