Changeset: 8a6820190a59 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8a6820190a59
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        clients/Tests/exports.stable.out
        gdk/gdk_subquery.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/optimizer/opt_mergetable.c
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        monetdb5/optimizer/opt_support.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_scenario.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
        sql/server/rel_dump.c
        sql/server/rel_optimize_sel.c
        sql/server/rel_optimizer.c
        sql/server/rel_unnest.c
        sql/server/sql_atom.c
        sql/server/sql_parser.y
        sql/server/sql_scan.c
        sql/test/bugs/Tests/rtrim_bug.test
Branch: groupjoin
Log Message:

groupjoin


diffs (truncated from 1287 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -3494,6 +3494,11 @@ command algebra.likeselect(X_0:bat[:str]
 PCRElikeselect;
 Select all head values of the first input BAT for which the@tail value is 
"like" the given (SQL-style) pattern and for@which the head value occurs in the 
tail of the second input@BAT.@Input is a dense-headed BAT, output is a 
dense-headed BAT with in@the tail the head value of the input BAT for which 
the@relationship holds.  The output BAT is sorted on the tail value.
 algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag; 
+algebra
 not_like
 command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit 
 PCREnotlike;
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -2929,6 +2929,11 @@ command algebra.likeselect(X_0:bat[:str]
 PCRElikeselect;
 Select all head values of the first input BAT for which the@tail value is 
"like" the given (SQL-style) pattern and for@which the head value occurs in the 
tail of the second input@BAT.@Input is a dense-headed BAT, output is a 
dense-headed BAT with in@the tail the head value of the input BAT for which 
the@relationship holds.  The output BAT is sorted on the tail value.
 algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid], 
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit]) 
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag; 
+algebra
 not_like
 command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit 
 PCREnotlike;
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1051,6 +1051,8 @@ const char *mal_version(void);
 stream *maleventstream;
 const char *manifoldRef;
 const char *mapiRef;
+const char *markjoinRef;
+const char *markselectRef;
 const char *maskRef;
 const char *matRef;
 const char *maxRef;
@@ -1127,6 +1129,7 @@ void opt_pipes_reset(void);
 str optimizeMALBlock(Client cntxt, MalBlkPtr mb);
 const char *optimizerRef;
 const char *outerjoinRef;
+const char *outerselectRef;
 const char *packIncrementRef;
 const char *packRef;
 const char *parametersRef;
diff --git a/gdk/gdk_subquery.c b/gdk/gdk_subquery.c
--- a/gdk/gdk_subquery.c
+++ b/gdk/gdk_subquery.c
@@ -448,8 +448,9 @@ BATanyequal_grp(BAT *l, BAT *r, BAT *g, 
                res->tkey = BATcount(res) <= 1;
                res->tsorted = BATcount(res) <= 1;
                res->trevsorted = BATcount(res) <= 1;
-               res->tnil = hasnil != 0;
-               res->tnonil = hasnil == 0;
+               (void)hasnil;
+               res->tnil = false ;//hasnil != 0;
+               res->tnonil = false ;//hasnil == 0;
        }
 
        TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",g=" ALGOBATFMT
@@ -500,7 +501,7 @@ BATallnotequal_grp(BAT *l, BAT *r, BAT *
                if ((res = COLnew(min, TYPE_bit, ngrp, TRANSIENT)) == NULL)
                        goto alloc_fail;
                ret = (bit *) Tloc(res, 0);
-               memset(ret, FALSE, ngrp * sizeof(bit));
+               memset(ret, TRUE, ngrp * sizeof(bit));
 
                if (!g || BATtdense(g))
                        gids = NULL;
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -377,6 +377,180 @@ ALGthetaselect2(bat *result, const bat *
        return MAL_SUCCEED;
 }
 
+#include <gdk_subquery.h>
+static str
+ALGmarkselect(bat *r1, bat *r2, const bat *gid, const bat *mid, const bat 
*lid, const bat *rid, const bit *any)
+{
+       /* g, e = group.done(gid)
+        * m = anyequal(l, r,  g, e, NULL); or allnotequal
+        * li = project(e, gid);
+        * return li, m
+        */
+       BAT *li = BATdescriptor(*gid), *g, *e, *mask = NULL;
+
+       if (!li)
+               throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       if (mid && !is_bat_nil(*mid) && (mask = BATdescriptor(*mid)) == NULL) {
+               BBPreclaim(li);
+               throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       if (BATgroup(&g, &e, NULL, li, NULL, NULL, NULL, NULL) == GDK_SUCCEED) {
+               BAT *l = BATdescriptor(*lid);
+               BAT *r = BATdescriptor(*rid);
+               BAT *m = NULL;
+
+               if (!l || !r) {
+                       BBPreclaim(li);
+                       BBPreclaim(mask);
+                       BBPreclaim(l);
+                       BBPreclaim(r);
+                       throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+               }
+               if (mask) {
+                       /* ugh temp convert into oid bat ! */
+                       BUN nr = BATcount(mask);
+                       BAT *rid = COLnew(0, TYPE_oid, nr, TRANSIENT);
+                       if (!rid) {
+                               BBPreclaim(li);
+                               BBPreclaim(mask);
+                               BBPreclaim(l);
+                               BBPreclaim(r);
+                               throw(MAL, "algebra.markselect", 
SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+                       }
+                       oid *o = Tloc(rid, 0);
+                       bit *ma = Tloc(mask, 0);
+                       for(BUN i = 0; i < nr; i++ ) {
+                               o[i] = (ma[i] == bit_nil)?oid_nil:0;
+                       }
+                       BATsetcount(rid, nr);
+                       rid->tsorted = 0;
+                       rid->tkey = 0;
+                       rid->tnil = 0;
+                       rid->tnonil = 0;
+                       if (*any)
+                               m = BATanyequal_grp2(l, r, rid, g, e, NULL);
+                       else
+                               m = BATallnotequal_grp2(l, r, rid, g, e, NULL);
+                       BBPreclaim(rid);
+               } else {
+                       if (*any)
+                               m = BATanyequal_grp(l, r, g, e, NULL);
+                       else
+                               m = BATallnotequal_grp(l, r, g, e, NULL);
+               }
+
+               BBPreclaim(mask);
+               BBPreclaim(l);
+               BBPreclaim(r);
+               BBPreclaim(g);
+               l = BATproject(e, li);
+               BBPreclaim(e);
+               BBPreclaim(li);
+
+               BBPkeepref(l);
+               BBPkeepref(m);
+               *r1 = l->batCacheid;
+               *r2 = m->batCacheid;
+               return MAL_SUCCEED;
+       } else {
+               BBPreclaim(li);
+               throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       return MAL_SUCCEED;
+}
+
+static str
+ALGouterselect(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat 
*pid)
+{
+       /* for each l-cand in lid, return atleast one, if rid == nil, return 
nil else pid  */
+       BAT *l = BATdescriptor(*lid); /* oid */
+       BAT *r = BATdescriptor(*rid); /* bit, nil for empty */
+       BAT *p = BATdescriptor(*pid); /* bit */
+       BAT *res1 = NULL, *res2 = NULL;
+
+       if (!l || !r || !p) {
+               if (l) BBPreclaim(l);
+               if (r) BBPreclaim(r);
+               if (p) BBPreclaim(p);
+               throw(MAL, "algebra.outerselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       BUN nr = BATcount(l), q = 0;
+
+       if ((res1 = COLnew(0, TYPE_oid, nr, TRANSIENT)) == NULL || (res2 = 
COLnew(0, TYPE_bit, nr, TRANSIENT)) == NULL) {
+               BBPreclaim(l);
+               BBPreclaim(r);
+               BBPreclaim(p);
+               if (res1) BBPreclaim(res1);
+               throw(MAL, "algebra.outerselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       }
+       assert(l->tsorted);
+       /* TODO handle void cases */
+       oid *ri1 = Tloc(res1, 0);
+       bit *ri2 = Tloc(res2, 0);
+       oid *li = Tloc(l, 0);
+       bit *ri = Tloc(r, 0);
+       bit *pi = Tloc(p, 0);
+       oid cur = oid_nil;
+
+       if (!li) { /* void case ? */
+               oid c = l->hseqbase;
+               for (BUN n = 0; n < nr; n++, c++) {
+                       ri1[q] = c;
+                       ri2[q] = (ri[n]!=bit_nil && *pi == TRUE)?TRUE:bit_nil;
+                       q++;
+               }
+       } else {
+               oid c = l->hseqbase;
+               if (nr)
+                       cur = li[0];
+               bool used = false;
+               for (BUN n = 0; n < nr; n++, c++) {
+                       if (c && cur != li[n]) {
+                               if (!used) {
+                                       ri1[q] = c-1;
+                                       ri2[q] = bit_nil;
+                                       q++;
+                               }
+                               used = false;
+                               cur = li[n];
+                       }
+                       if (*ri == bit_nil || *pi == TRUE) {
+                               ri1[q] = c;
+                               ri2[q] = *ri == bit_nil?bit_nil:TRUE;
+                               cur = li[n];
+                               used = true;
+                               q++;
+                       }
+               }
+               if (nr && !used) {
+                       ri1[q] = c-1;
+                       ri2[q] = bit_nil;
+                       q++;
+               }
+       }
+       BATsetcount(res1, q);
+       BATsetcount(res2, q);
+       res1->tsorted = true;
+       res1->tkey = true;
+       res1->trevsorted = false;
+       res2->tsorted = false;
+       res2->trevsorted = false;
+       res1->tnil = false;
+       res1->tnonil = true;
+       res2->tnonil = false;
+
+       BBPreclaim(l);
+       BBPreclaim(r);
+       BBPreclaim(p);
+
+       BBPkeepref(res1);
+       BBPkeepref(res2);
+       *r1 = res1->batCacheid;
+       *r2 = res2->batCacheid;
+       return MAL_SUCCEED;
+}
+
+
 static str
 ALGselectNotNil(bat *result, const bat *bid)
 {
@@ -567,6 +741,86 @@ ALGleftjoin1(bat *r1, const bat *lid, co
                                   BATleftjoin, NULL, NULL, NULL, NULL, NULL, 
NULL, "algebra.leftjoin");
 }
 
+static str ALGcrossproduct2(bat *l, bat *r, const bat *left, const bat *right, 
const bit *max_one);
+static str
+ALGmarkjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid,
+                  const bit *any, const lng *estimate)
+{
+       str res = NULL;
+       bit max_one = false;
+       *r1 = *r2 = 0;
+       (void)estimate;
+       /* for now: (left) cross aggr (any-equal) */
+       BAT *rr = BATdescriptor(is_bat_nil(*srid)?*rid:*srid);
+       if (!BATcount(rr)) {
+               BAT *l = NULL;
+               if (is_bat_nil(*slid)) {
+                       BAT *lp = BATdescriptor(*lid);
+
+                       if (lp) {
+                               l = BATdense(lp->hseqbase, lp->hseqbase, 
BATcount(lp));
+                               BBPunfix(lp->batCacheid);
+                       }
+               } else {
+                       l = BATdescriptor(*slid);
+               }
+               bit v = *any?false:true;
+               BAT *m =  BATconstant( l->hseqbase, TYPE_bit, &v, BATcount(l), 
TRANSIENT);
+               BBPkeepref(l);
+               BBPkeepref(m);
+               *r1 = l->batCacheid;
+               *r2 = m->batCacheid;
+               BBPunfix(rr->batCacheid);
+               return MAL_SUCCEED;
+       }
+       BBPunfix(rr->batCacheid);
+       if ((res = ALGcrossproduct2(r1, r2, is_bat_nil(*slid)?lid:slid, 
(*srid)?rid:srid, &max_one)) == MAL_SUCCEED) {
+               BAT *li = BATdescriptor(*r1), *g = NULL, *e = NULL;
+               if (!li) {
+                       BBPrelease(*r1);
+                       BBPrelease(*r2);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to