Changeset: ad492b0794c2 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ad492b0794c2
Modified Files:
gdk/gdk_join.c
Branch: qcancel
Log Message:
merge with default
diffs (truncated from 2795 to 300 lines):
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1384,22 +1384,21 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
msks->free = sizeof(ccand_t) + nmask * sizeof(uint32_t);
uint32_t *r = (uint32_t*)(msks->base + sizeof(ccand_t));
if (selected) {
- memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
+ if (nr <= BATcount(masked))
+ memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
+ else
+ memcpy(r, Tloc(masked, 0), (BATcount(masked) + 31) / 32
* sizeof(uint32_t));
} else {
const uint32_t *s = (const uint32_t *) Tloc(masked, 0);
- BUN nmask_ = (BATcount(masked) + 31)/32;
+ BUN nmask_ = (BATcount(masked) + 31) / 32;
for (BUN i = 0; i < nmask_; i++)
r[i] = ~s[i];
}
if (nr > BATcount(masked)) {
- BUN rest = BATcount(masked)&31, nmask_ =
(BATcount(masked)+31)/32, nrest = nr;
- int v = 0;
- if (nmask_ > nmask)
- nrest = 32-rest;
-
- for (BUN j = rest; j < nrest; j++)
- v |= 1U<<j;
- r[nmask_ -1] |= v;
+ BUN rest = BATcount(masked) & 31;
+ BUN nmask_ = (BATcount(masked) + 31) / 32;
+ if (rest > 0)
+ r[nmask_ -1] |= ((1U << (32 - rest)) - 1) << rest;
for (BUN j = nmask_; j < nmask; j++)
r[j] = ~0;
}
@@ -1432,71 +1431,126 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
return bn;
}
+/* convert a masked candidate list to a positive or negative candidate list */
BAT *
BATunmask(BAT *b)
{
- BAT *bn = COLnew(0, TYPE_oid, mask_cand(b) ? BATcount(b) : 1024,
TRANSIENT);
- if (bn == NULL)
- return NULL;
-
// assert(!mask_cand(b) || CCAND(b)->mask); /* todo handle negmask case */
BUN cnt;
uint32_t rem;
uint32_t val;
const uint32_t *src;
- oid *dst = (oid *) Tloc(bn, 0);
+ oid *dst;
BUN n = 0;
oid hseq = b->hseqbase;
+ bool negcand = false;
if (mask_cand(b)) {
cnt = ccand_free(b) / sizeof(uint32_t);
rem = 0;
src = (const uint32_t *) ccand_first(b);
hseq -= (oid) CCAND(b)->firstbit;
+ /* create negative candidate list if more than half the
+ * bits are set */
+ negcand = BATcount(b) > cnt * 16;
} else {
cnt = BATcount(b) / 32;
rem = BATcount(b) % 32;
src = (const uint32_t *) Tloc(b, 0);
}
- for (BUN p = 0; p < cnt; p++) {
- if ((val = src[p]) == 0)
- continue;
- for (uint32_t i = 0; i < 32; i++) {
- if (val & (1U << i)) {
- if (n == BATcapacity(bn)) {
- BATsetcount(bn, n);
- if (BATextend(bn, BATgrows(bn)) !=
GDK_SUCCEED) {
- BBPreclaim(bn);
- return NULL;
- }
- dst = (oid *) Tloc(bn, 0);
+ BAT *bn;
+
+ if (negcand) {
+ bn = COLnew(b->hseqbase, TYPE_void, 0, TRANSIENT);
+ if (bn == NULL)
+ return NULL;
+ Heap *dels;
+ if ((dels = GDKzalloc(sizeof(Heap))) == NULL ||
+ strconcat_len(dels->filename, sizeof(dels->filename),
+ BBP_physical(bn->batCacheid), ".theap",
+ NULL) >= sizeof(dels->filename) ||
+ (dels->farmid = BBPselectfarm(TRANSIENT, TYPE_void,
+ varheap)) == -1 ||
+ HEAPalloc(dels,
+ cnt * 32 - BATcount(b)
+ + sizeof(ccand_t) / sizeof(oid),
+ sizeof(oid), 0) != GDK_SUCCEED) {
+ GDKfree(dels);
+ BBPreclaim(bn);
+ return NULL;
+ }
+ dels->parentid = bn->batCacheid;
+ * (ccand_t *) dels->base = (ccand_t) {
+ .type = CAND_NEGOID,
+ };
+ dst = (oid *) (dels->base + sizeof(ccand_t));
+ for (BUN p = 0, v = 0; p < cnt; p++, v += 32) {
+ if ((val = src[p]) == ~UINT32_C(0))
+ continue;
+ for (uint32_t i = 0; i < 32; i++) {
+ if ((val & (1U << i)) == 0) {
+ if (v + i >= b->batCount + n)
+ break;
+ dst[n++] = hseq + v + i;
}
- dst[n++] = hseq + p * 32 + i;
}
}
- }
- /* the last partial mask word */
- if (rem > 0 && (val = src[cnt]) != 0) {
- for (uint32_t i = 0; i < rem; i++) {
- if (val & (1U << i)) {
- if (n == BATcapacity(bn)) {
- BATsetcount(bn, n);
- if (BATextend(bn, BATgrows(bn)) !=
GDK_SUCCEED) {
- BBPreclaim(bn);
- return NULL;
+ if (n == 0) {
+ /* didn't need it after all */
+ HEAPfree(dels, true);
+ } else {
+ ATOMIC_INIT(&dels->refs, 1);
+ bn->tvheap = dels;
+ bn->tvheap->free = sizeof(ccand_t) + n * sizeof(oid);
+ }
+ BATsetcount(bn, BATcount(b));
+ bn->tseqbase = hseq;
+ } else {
+ bn = COLnew(b->hseqbase, TYPE_oid, mask_cand(b) ? BATcount(b) :
1024, TRANSIENT);
+ if (bn == NULL)
+ return NULL;
+ dst = (oid *) Tloc(bn, 0);
+ for (BUN p = 0; p < cnt; p++) {
+ if ((val = src[p]) == 0)
+ continue;
+ for (uint32_t i = 0; i < 32; i++) {
+ if (val & (1U << i)) {
+ if (n == BATcapacity(bn)) {
+ BATsetcount(bn, n);
+ if (BATextend(bn, BATgrows(bn))
!= GDK_SUCCEED) {
+ BBPreclaim(bn);
+ return NULL;
+ }
+ dst = (oid *) Tloc(bn, 0);
}
- dst = (oid *) Tloc(bn, 0);
+ dst[n++] = hseq + p * 32 + i;
}
- dst[n++] = hseq + cnt * 32 + i;
}
}
+ /* the last partial mask word */
+ if (rem > 0 && (val = src[cnt]) != 0) {
+ for (uint32_t i = 0; i < rem; i++) {
+ if (val & (1U << i)) {
+ if (n == BATcapacity(bn)) {
+ BATsetcount(bn, n);
+ if (BATextend(bn, BATgrows(bn))
!= GDK_SUCCEED) {
+ BBPreclaim(bn);
+ return NULL;
+ }
+ dst = (oid *) Tloc(bn, 0);
+ }
+ dst[n++] = hseq + cnt * 32 + i;
+ }
+ }
+ }
+ BATsetcount(bn, n);
}
- BATsetcount(bn, n);
- bn->hseqbase = b->hseqbase;
bn->tkey = true;
bn->tsorted = true;
bn->trevsorted = n <= 1;
bn->tnil = false;
bn->tnonil = true;
- return virtualize(bn);
+ bn = virtualize(bn);
+ TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n", ALGOBATPAR(b),
ALGOBATPAR(bn));
+ return bn;
}
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3232,10 +3232,12 @@ joincost(BAT *r, struct canditer *lci, s
#endif
rcost += BATcount(r) * 2.0;
}
- if (rci->ncand != BATcount(r)) {
+ if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
/* instead of using the hash on r (cost in rcost), we
* can build a new hash on r taking the candidate list
- * into account */
+ * into account; don't do this for masked candidate
+ * since the searching of the candidate list
+ * (canditer_idx) will kill us */
double rccost;
PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
if (prop) {
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -792,9 +792,7 @@ BATprojectchain(BAT **bats)
for (n = 0; bats[n]; n++) {
b = bats[n];
if (b->ttype == TYPE_msk || mask_cand(b)) {
- BAT *nb = b;
-
- if ((b = BATunmask(nb)) == NULL) {
+ if ((b = BATunmask(b)) == NULL) {
goto bunins_failed;
}
tobedeleted[ndelete++] = b;
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -3817,17 +3817,7 @@ STRbatsubstring_2nd_3rd_cst(Client cntxt
}
bailout:
GDKfree(buf);
- if (bn && !msg) {
- BATsetcount(bn, q);
- bn->tnil = nils;
- bn->tnonil = !nils;
- bn->tkey = BATcount(bn) <= 1;
- bn->tsorted = b->tsorted;
- bn->trevsorted = b->trevsorted;
- bn->theap->dirty = true;
- BBPkeepref(*res = bn->batCacheid);
- } else if (bn)
- BBPreclaim(bn);
+ finalize_ouput(res, bn, msg, nils, q);
unfix_inputs(2, b, bs);
return msg;
}
diff --git a/monetdb5/optimizer/opt_mergetable.c
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -2051,6 +2051,22 @@ OPTmergetableImplementation(Client cntxt
bats = nr_of_bats(mb, p);
nilbats = nr_of_nilbats(mb, p);
+ /* left joins can match at isMatJoinOp, so run this check
beforehand */
+ if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 5 && p->retc
== 2 &&
+ (match == 1 || match == 2) && bats+nilbats == 4) {
+ m = is_a_mat(getArg(p,p->retc), &ml);
+ o = is_a_mat(getArg(p,p->retc+2), &ml);
+
+ if ((match == 1 && m >= 0) || (match == 2 && m >= 0 &&
o >= 0)) {
+ if(mat_join2(mb, p, &ml, m, -1, o, -1)) {
+ msg =
createException(MAL,"optimizer.mergetable",SQLSTATE(HY013) MAL_MALLOC_FAIL);
+ goto cleanup;
+ }
+ actions++;
+ continue;
+ }
+ }
+
/* (l,r) Join (L, R, ..)
* 2 -> (l,r) equi/theta joins (l,r)
* 3 -> (l,r) range-joins (l,r1,r2)
@@ -2076,20 +2092,6 @@ OPTmergetableImplementation(Client cntxt
actions++;
continue;
}
- if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 5 && p->retc
== 2 &&
- (match == 1 || match == 2) && bats+nilbats ==
4) {
- m = is_a_mat(getArg(p,p->retc), &ml);
- o = is_a_mat(getArg(p,p->retc+2), &ml);
-
- if ((match == 1 && m >= 0) || (match == 2 && m >= 0 &&
o >= 0)) {
- if(mat_join2(mb, p, &ml, m, -1, o, -1)) {
- msg =
createException(MAL,"optimizer.mergetable",SQLSTATE(HY013) MAL_MALLOC_FAIL);
- goto cleanup;
- }
- actions++;
- continue;
- }
- }
/*
* Aggregate handling is a prime target for optimization.
* The simple cases are dealt with first.
diff --git a/monetdb5/optimizer/opt_support.c b/monetdb5/optimizer/opt_support.c
--- a/monetdb5/optimizer/opt_support.c
+++ b/monetdb5/optimizer/opt_support.c
@@ -674,7 +674,7 @@ inline int isSubJoin(InstrPtr p)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list