Changeset: ad492b0794c2 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ad492b0794c2
Modified Files:
        gdk/gdk_join.c
Branch: qcancel
Log Message:

merge with default


diffs (truncated from 2795 to 300 lines):

diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1384,22 +1384,21 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
        msks->free = sizeof(ccand_t) + nmask * sizeof(uint32_t);
        uint32_t *r = (uint32_t*)(msks->base + sizeof(ccand_t));
        if (selected) {
-               memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
+               if (nr <= BATcount(masked))
+                       memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
+               else
+                       memcpy(r, Tloc(masked, 0), (BATcount(masked) + 31) / 32 
* sizeof(uint32_t));
        } else {
                const uint32_t *s = (const uint32_t *) Tloc(masked, 0);
-               BUN nmask_ = (BATcount(masked) + 31)/32;
+               BUN nmask_ = (BATcount(masked) + 31) / 32;
                for (BUN i = 0; i < nmask_; i++)
                        r[i] = ~s[i];
        }
        if (nr > BATcount(masked)) {
-               BUN rest = BATcount(masked)&31, nmask_ = 
(BATcount(masked)+31)/32, nrest = nr;
-               int v = 0;
-               if (nmask_ > nmask)
-                       nrest = 32-rest;
-
-               for (BUN j = rest; j < nrest; j++)
-                       v |= 1U<<j;
-               r[nmask_ -1] |= v;
+               BUN rest = BATcount(masked) & 31;
+               BUN nmask_ = (BATcount(masked) + 31) / 32;
+               if (rest > 0)
+                       r[nmask_ -1] |= ((1U << (32 - rest)) - 1) << rest;
                for (BUN j = nmask_; j < nmask; j++)
                        r[j] = ~0;
        }
@@ -1432,71 +1431,126 @@ BATmaskedcands(oid hseq, BUN nr, BAT *ma
        return bn;
 }
 
+/* convert a masked candidate list to a positive or negative candidate list */
 BAT *
 BATunmask(BAT *b)
 {
-       BAT *bn = COLnew(0, TYPE_oid, mask_cand(b) ? BATcount(b) : 1024, 
TRANSIENT);
-       if (bn == NULL)
-               return NULL;
-
 //     assert(!mask_cand(b) || CCAND(b)->mask); /* todo handle negmask case */
        BUN cnt;
        uint32_t rem;
        uint32_t val;
        const uint32_t *src;
-       oid *dst = (oid *) Tloc(bn, 0);
+       oid *dst;
        BUN n = 0;
        oid hseq = b->hseqbase;
+       bool negcand = false;
 
        if (mask_cand(b)) {
                cnt = ccand_free(b) / sizeof(uint32_t);
                rem = 0;
                src = (const uint32_t *) ccand_first(b);
                hseq -= (oid) CCAND(b)->firstbit;
+               /* create negative candidate list if more than half the
+                * bits are set */
+               negcand = BATcount(b) > cnt * 16;
        } else {
                cnt = BATcount(b) / 32;
                rem = BATcount(b) % 32;
                src = (const uint32_t *) Tloc(b, 0);
        }
-       for (BUN p = 0; p < cnt; p++) {
-               if ((val = src[p]) == 0)
-                       continue;
-               for (uint32_t i = 0; i < 32; i++) {
-                       if (val & (1U << i)) {
-                               if (n == BATcapacity(bn)) {
-                                       BATsetcount(bn, n);
-                                       if (BATextend(bn, BATgrows(bn)) != 
GDK_SUCCEED) {
-                                               BBPreclaim(bn);
-                                               return NULL;
-                                       }
-                                       dst = (oid *) Tloc(bn, 0);
+       BAT *bn;
+
+       if (negcand) {
+               bn = COLnew(b->hseqbase, TYPE_void, 0, TRANSIENT);
+               if (bn == NULL)
+                       return NULL;
+               Heap *dels;
+               if ((dels = GDKzalloc(sizeof(Heap))) == NULL ||
+                   strconcat_len(dels->filename, sizeof(dels->filename),
+                                 BBP_physical(bn->batCacheid), ".theap",
+                                 NULL) >= sizeof(dels->filename) ||
+                   (dels->farmid = BBPselectfarm(TRANSIENT, TYPE_void,
+                                                 varheap)) == -1 ||
+                   HEAPalloc(dels,
+                             cnt * 32 - BATcount(b)
+                             + sizeof(ccand_t) / sizeof(oid),
+                             sizeof(oid), 0) != GDK_SUCCEED) {
+                       GDKfree(dels);
+                       BBPreclaim(bn);
+                       return NULL;
+               }
+               dels->parentid = bn->batCacheid;
+               * (ccand_t *) dels->base = (ccand_t) {
+                       .type = CAND_NEGOID,
+               };
+               dst = (oid *) (dels->base + sizeof(ccand_t));
+               for (BUN p = 0, v = 0; p < cnt; p++, v += 32) {
+                       if ((val = src[p]) == ~UINT32_C(0))
+                               continue;
+                       for (uint32_t i = 0; i < 32; i++) {
+                               if ((val & (1U << i)) == 0) {
+                                       if (v + i >= b->batCount + n)
+                                               break;
+                                       dst[n++] = hseq + v + i;
                                }
-                               dst[n++] = hseq + p * 32 + i;
                        }
                }
-       }
-       /* the last partial mask word */
-       if (rem > 0 && (val = src[cnt]) != 0) {
-               for (uint32_t i = 0; i < rem; i++) {
-                       if (val & (1U << i)) {
-                               if (n == BATcapacity(bn)) {
-                                       BATsetcount(bn, n);
-                                       if (BATextend(bn, BATgrows(bn)) != 
GDK_SUCCEED) {
-                                               BBPreclaim(bn);
-                                               return NULL;
+               if (n == 0) {
+                       /* didn't need it after all */
+                       HEAPfree(dels, true);
+               } else {
+                       ATOMIC_INIT(&dels->refs, 1);
+                       bn->tvheap = dels;
+                       bn->tvheap->free = sizeof(ccand_t) + n * sizeof(oid);
+               }
+               BATsetcount(bn, BATcount(b));
+               bn->tseqbase = hseq;
+       } else {
+               bn = COLnew(b->hseqbase, TYPE_oid, mask_cand(b) ? BATcount(b) : 
1024, TRANSIENT);
+               if (bn == NULL)
+                       return NULL;
+               dst = (oid *) Tloc(bn, 0);
+               for (BUN p = 0; p < cnt; p++) {
+                       if ((val = src[p]) == 0)
+                               continue;
+                       for (uint32_t i = 0; i < 32; i++) {
+                               if (val & (1U << i)) {
+                                       if (n == BATcapacity(bn)) {
+                                               BATsetcount(bn, n);
+                                               if (BATextend(bn, BATgrows(bn)) 
!= GDK_SUCCEED) {
+                                                       BBPreclaim(bn);
+                                                       return NULL;
+                                               }
+                                               dst = (oid *) Tloc(bn, 0);
                                        }
-                                       dst = (oid *) Tloc(bn, 0);
+                                       dst[n++] = hseq + p * 32 + i;
                                }
-                               dst[n++] = hseq + cnt * 32 + i;
                        }
                }
+               /* the last partial mask word */
+               if (rem > 0 && (val = src[cnt]) != 0) {
+                       for (uint32_t i = 0; i < rem; i++) {
+                               if (val & (1U << i)) {
+                                       if (n == BATcapacity(bn)) {
+                                               BATsetcount(bn, n);
+                                               if (BATextend(bn, BATgrows(bn)) 
!= GDK_SUCCEED) {
+                                                       BBPreclaim(bn);
+                                                       return NULL;
+                                               }
+                                               dst = (oid *) Tloc(bn, 0);
+                                       }
+                                       dst[n++] = hseq + cnt * 32 + i;
+                               }
+                       }
+               }
+               BATsetcount(bn, n);
        }
-       BATsetcount(bn, n);
-       bn->hseqbase = b->hseqbase;
        bn->tkey = true;
        bn->tsorted = true;
        bn->trevsorted = n <= 1;
        bn->tnil = false;
        bn->tnonil = true;
-       return virtualize(bn);
+       bn = virtualize(bn);
+       TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n", ALGOBATPAR(b), 
ALGOBATPAR(bn));
+       return bn;
 }
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3232,10 +3232,12 @@ joincost(BAT *r, struct canditer *lci, s
 #endif
                        rcost += BATcount(r) * 2.0;
        }
-       if (rci->ncand != BATcount(r)) {
+       if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
                /* instead of using the hash on r (cost in rcost), we
                 * can build a new hash on r taking the candidate list
-                * into account */
+                * into account; don't do this for masked candidate
+                * since the searching of the candidate list
+                * (canditer_idx) will kill us */
                double rccost;
                PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
                if (prop) {
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -792,9 +792,7 @@ BATprojectchain(BAT **bats)
        for (n = 0; bats[n]; n++) {
                b = bats[n];
                if (b->ttype == TYPE_msk || mask_cand(b)) {
-                       BAT *nb = b;
-
-                       if ((b = BATunmask(nb)) == NULL) {
+                       if ((b = BATunmask(b)) == NULL) {
                                goto bunins_failed;
                        }
                        tobedeleted[ndelete++] = b;
diff --git a/monetdb5/modules/kernel/batstr.c b/monetdb5/modules/kernel/batstr.c
--- a/monetdb5/modules/kernel/batstr.c
+++ b/monetdb5/modules/kernel/batstr.c
@@ -3817,17 +3817,7 @@ STRbatsubstring_2nd_3rd_cst(Client cntxt
        }
 bailout:
        GDKfree(buf);
-       if (bn && !msg) {
-               BATsetcount(bn, q);
-               bn->tnil = nils;
-               bn->tnonil = !nils;
-               bn->tkey = BATcount(bn) <= 1;
-               bn->tsorted = b->tsorted;
-               bn->trevsorted = b->trevsorted;
-               bn->theap->dirty = true;
-               BBPkeepref(*res = bn->batCacheid);
-       } else if (bn)
-               BBPreclaim(bn);
+       finalize_ouput(res, bn, msg, nils, q);
        unfix_inputs(2, b, bs);
        return msg;
 }
diff --git a/monetdb5/optimizer/opt_mergetable.c 
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -2051,6 +2051,22 @@ OPTmergetableImplementation(Client cntxt
                bats = nr_of_bats(mb, p);
                nilbats = nr_of_nilbats(mb, p);
 
+               /* left joins can match at isMatJoinOp, so run this check 
beforehand */
+               if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 5 && p->retc 
== 2 &&
+                       (match == 1 || match == 2) && bats+nilbats == 4) {
+                       m = is_a_mat(getArg(p,p->retc), &ml);
+                       o = is_a_mat(getArg(p,p->retc+2), &ml);
+
+                       if ((match == 1 && m >= 0) || (match == 2 && m >= 0 && 
o >= 0)) {
+                               if(mat_join2(mb, p, &ml, m, -1, o, -1)) {
+                                       msg = 
createException(MAL,"optimizer.mergetable",SQLSTATE(HY013) MAL_MALLOC_FAIL);
+                                       goto cleanup;
+                               }
+                               actions++;
+                               continue;
+                       }
+               }
+
                /* (l,r) Join (L, R, ..)
                 * 2 -> (l,r) equi/theta joins (l,r)
                 * 3 -> (l,r) range-joins (l,r1,r2)
@@ -2076,20 +2092,6 @@ OPTmergetableImplementation(Client cntxt
                        actions++;
                        continue;
                }
-               if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 5 && p->retc 
== 2 &&
-                               (match == 1 || match == 2) && bats+nilbats == 
4) {
-                       m = is_a_mat(getArg(p,p->retc), &ml);
-                       o = is_a_mat(getArg(p,p->retc+2), &ml);
-
-                       if ((match == 1 && m >= 0) || (match == 2 && m >= 0 && 
o >= 0)) {
-                               if(mat_join2(mb, p, &ml, m, -1, o, -1)) {
-                                       msg = 
createException(MAL,"optimizer.mergetable",SQLSTATE(HY013) MAL_MALLOC_FAIL);
-                                       goto cleanup;
-                               }
-                               actions++;
-                               continue;
-                       }
-               }
                /*
                 * Aggregate handling is a prime target for optimization.
                 * The simple cases are dealt with first.
diff --git a/monetdb5/optimizer/opt_support.c b/monetdb5/optimizer/opt_support.c
--- a/monetdb5/optimizer/opt_support.c
+++ b/monetdb5/optimizer/opt_support.c
@@ -674,7 +674,7 @@ inline int isSubJoin(InstrPtr p)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to