Changeset: 8b106db8d96c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8b106db8d96c
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        monetdb5/modules/kernel/algebra.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
        sql/include/sql_catalog.h
        sql/include/sql_relation.h
        sql/rel.txt
        sql/server/rel_dump.c
        sql/server/rel_exp.c
        sql/server/rel_optimize_exps.c
        sql/server/rel_optimize_proj.c
        sql/server/rel_optimize_sel.c
        sql/server/rel_optimizer.c
        sql/server/rel_rewriter.c
        sql/server/rel_statistics.c
        sql/server/rel_unnest.c
Branch: groupjoin
Log Message:

reimplemented exists, in/not in, any/all.
expressions which need groupjoins where nil information is needed (in/any/all),
the compare expressions are marked as 'any' (all cases are re-written into any 
and not any).
In the dump output this is seen as a '+' before the = operator.

removed all uses of mark_in/mark_notin

current implementation uses cross + outer/markselect, ie outer/markjoins still 
need work.


diffs (truncated from 1476 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -3500,7 +3500,7 @@ ALGmarkjoin;
 Left mark join with candidate lists, produces left output and mark flag; 
 algebra
 markselect
-command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:any_1], 
X_3:bat[:any_1], X_4:bit) (X_5:bat[:oid], X_6:bat[:bit]) 
+command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
 ALGmarkselect;
 Group on group-ids, return aggregated anyequal or allnotequal
 algebra
@@ -3530,7 +3530,7 @@ ALGouterjoin;
 Left outer join with candidate lists
 algebra
 outerselect
-command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit]) 
(X_3:bat[:oid], X_4:bat[:bit]) 
+command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
 ALGouterselect;
 Per input lid return atleast one row, if none of the predicates (p) hold, 
return a nil, else 'all' true cases.
 algebra
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -2935,7 +2935,7 @@ ALGmarkjoin;
 Left mark join with candidate lists, produces left output and mark flag; 
 algebra
 markselect
-command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:any_1], 
X_3:bat[:any_1], X_4:bit) (X_5:bat[:oid], X_6:bat[:bit]) 
+command algebra.markselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
 ALGmarkselect;
 Group on group-ids, return aggregated anyequal or allnotequal
 algebra
@@ -2965,7 +2965,7 @@ ALGouterjoin;
 Left outer join with candidate lists
 algebra
 outerselect
-command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit]) 
(X_3:bat[:oid], X_4:bat[:bit]) 
+command algebra.outerselect(X_0:bat[:oid], X_1:bat[:bit], X_2:bat[:bit], 
X_3:bit) (X_4:bat[:oid], X_5:bat[:bit]) 
 ALGouterselect;
 Per input lid return atleast one row, if none of the predicates (p) hold, 
return a nil, else 'all' true cases.
 algebra
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -386,147 +386,171 @@ ALGthetaselect2(bat *result, const bat *
        return MAL_SUCCEED;
 }
 
-#include <gdk_subquery.h>
 static str
-ALGmarkselect(bat *r1, bat *r2, const bat *gid, const bat *mid, const bat 
*lid, const bat *rid, const bit *any)
+ALGmarkselect(bat *r1, bat *r2, const bat *gid, const bat *mid, const bat 
*pid, const bit *Any)
 {
-       /* g, e = group.done(gid)
-        * m = anyequal(l, r,  g, e, NULL); or allnotequal
-        * li = project(e, gid);
-        * return li, m
-        */
-       BAT *li = BATdescriptor(*gid), *g, *e, *mask = NULL;
+       BAT *g = BATdescriptor(*gid); /* oid */
+       BAT *m = BATdescriptor(*mid); /* bit, true: match, nil: empty set, 
false: nil on left */
+       BAT *p = BATdescriptor(*pid); /* bit */
+       BAT *res1 = NULL, *res2 = NULL;
+       bit any = *Any; /* any or normal comparision semantics */
 
-       if (!li)
+       if (!g || !m || !p) {
+               if (g) BBPreclaim(g);
+               if (m) BBPreclaim(m);
+               if (p) BBPreclaim(p);
                throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
-       if (mid && !is_bat_nil(*mid) && (mask = BATdescriptor(*mid)) == NULL) {
-               BBPreclaim(li);
+       }
+       BUN nr = BATcount(g), q = 0;
+
+       if ((res1 = COLnew(0, TYPE_oid, nr, TRANSIENT)) == NULL || (res2 = 
COLnew(0, TYPE_bit, nr, TRANSIENT)) == NULL) {
+               BBPreclaim(g);
+               BBPreclaim(m);
+               BBPreclaim(p);
+               if (res1) BBPreclaim(res1);
                throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
        }
-       if (BATgroup(&g, &e, NULL, li, NULL, NULL, NULL, NULL) == GDK_SUCCEED) {
-               BAT *l = BATdescriptor(*lid);
-               BAT *r = BATdescriptor(*rid);
-               BAT *m = NULL;
+       assert(g->tsorted);
+       oid *ri1 = Tloc(res1, 0);
+       bit *ri2 = Tloc(res2, 0);
+       oid *gi = Tloc(g, 0);
+       bit *mi = Tloc(m, 0);
+       bit *pi = Tloc(p, 0);
+       oid cur = oid_nil;
 
-               if (!l || !r) {
-                       BBPreclaim(li);
-                       BBPreclaim(mask);
-                       BBPreclaim(l);
-                       BBPreclaim(r);
-                       throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+       if (!gi) { /* void case ? */
+               oid c = g->hseqbase;
+               for (BUN n = 0; n < nr; n++, c++) {
+                       ri1[q] = c;
+                       if (mi[n] == bit_nil) /* empty */
+                               ri2[q] = FALSE;
+                       else if (pi[n] == bit_nil || mi[n] == false)
+                               ri2[q] = bit_nil;
+                       else
+                               ri2[q] = (mi[n] == TRUE && pi[n] == 
TRUE)?TRUE:FALSE;
+                       q++;
                }
-               if (mask) {
-                       /* ugh temp convert into oid bat ! */
-                       BUN nr = BATcount(mask);
-                       BAT *rid = COLnew(0, TYPE_oid, nr, TRANSIENT);
-                       if (!rid) {
-                               BBPreclaim(li);
-                               BBPreclaim(mask);
-                               BBPreclaim(l);
-                               BBPreclaim(r);
-                               throw(MAL, "algebra.markselect", 
SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
-                       }
-                       oid *o = Tloc(rid, 0);
-                       bit *ma = Tloc(mask, 0);
-                       for(BUN i = 0; i < nr; i++ ) {
-                               o[i] = (ma[i] == bit_nil)?oid_nil:0;
+       } else {
+               oid c = g->hseqbase;
+               if (nr)
+                       cur = gi[0];
+               bit m = FALSE;
+               bool has_nil = false;
+               for (BUN n = 0; n < nr; n++, c++) {
+                       if (c && cur != gi[n]) {
+                               ri1[q] = c-1;
+                               ri2[q] = (m == 
TRUE)?TRUE:(has_nil)?bit_nil:FALSE;
+                               q++;
+                               cur = gi[n];
+                               m = FALSE;
+                               has_nil = false;
                        }
-                       BATsetcount(rid, nr);
-                       rid->tsorted = 0;
-                       rid->tkey = 0;
-                       rid->tnil = 0;
-                       rid->tnonil = 0;
-                       if (*any)
-                               m = BATanyequal_grp2(l, r, rid, g, e, NULL);
-                       else
-                               m = BATallnotequal_grp2(l, r, rid, g, e, NULL);
-                       BBPreclaim(rid);
-               } else {
-                       if (*any)
-                               m = BATanyequal_grp(l, r, g, e, NULL);
-                       else
-                               m = BATallnotequal_grp(l, r, g, e, NULL);
+                       if (m == TRUE)
+                               continue;
+                       if (m == FALSE && mi[n] == bit_nil /* empty */) {
+                               m = m;
+                       } else if ((mi[n] == FALSE && pi[n] == TRUE) /* ie has 
nil */ || (any && mi[n] == TRUE && pi[n] == bit_nil)) {
+                               has_nil = true;
+                       } else {
+                               m = (mi[n] == TRUE && pi[n] == TRUE)?TRUE:m;
+                       }
                }
+               if (nr) {
+                       ri1[q] = c-1;
+                       ri2[q] = (m == TRUE)?TRUE:(has_nil)?bit_nil:FALSE;
+               }
+               q++;
+       }
+       BATsetcount(res1, q);
+       BATsetcount(res2, q);
+       res1->tsorted = true;
+       res1->tkey = true;
+       res1->trevsorted = false;
+       res2->tsorted = false;
+       res2->trevsorted = false;
+       res1->tnil = false;
+       res1->tnonil = true;
+       res2->tnonil = false;
 
-               BBPreclaim(mask);
-               BBPreclaim(l);
-               BBPreclaim(r);
-               BBPreclaim(g);
-               l = BATproject(e, li);
-               BBPreclaim(e);
-               BBPreclaim(li);
+       BBPreclaim(g);
+       BBPreclaim(m);
+       BBPreclaim(p);
 
-               BBPkeepref(l);
-               BBPkeepref(m);
-               *r1 = l->batCacheid;
-               *r2 = m->batCacheid;
-               return MAL_SUCCEED;
-       } else {
-               BBPreclaim(li);
-               throw(MAL, "algebra.markselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
-       }
+       BBPkeepref(res1);
+       BBPkeepref(res2);
+       *r1 = res1->batCacheid;
+       *r2 = res2->batCacheid;
        return MAL_SUCCEED;
 }
 
 static str
-ALGouterselect(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat 
*pid)
+ALGouterselect(bat *r1, bat *r2, const bat *gid, const bat *mid, const bat 
*pid, const bit *Any)
 {
        /* for each l-cand in lid, return atleast one, if rid == nil, return 
nil else pid  */
-       BAT *l = BATdescriptor(*lid); /* oid */
-       BAT *r = BATdescriptor(*rid); /* bit, nil for empty */
+       BAT *g = BATdescriptor(*gid); /* oid */
+       BAT *m = BATdescriptor(*mid); /* bit, true: match, nil: empty set, 
false: nil on left */
        BAT *p = BATdescriptor(*pid); /* bit */
        BAT *res1 = NULL, *res2 = NULL;
+       bit any = *Any; /* any or normal comparision semantics */
 
-       if (!l || !r || !p) {
-               if (l) BBPreclaim(l);
-               if (r) BBPreclaim(r);
+       if (!g || !m || !p) {
+               if (g) BBPreclaim(g);
+               if (m) BBPreclaim(m);
                if (p) BBPreclaim(p);
                throw(MAL, "algebra.outerselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
        }
-       BUN nr = BATcount(l), q = 0;
+       BUN nr = BATcount(g), q = 0;
 
        if ((res1 = COLnew(0, TYPE_oid, nr, TRANSIENT)) == NULL || (res2 = 
COLnew(0, TYPE_bit, nr, TRANSIENT)) == NULL) {
-               BBPreclaim(l);
-               BBPreclaim(r);
+               BBPreclaim(g);
+               BBPreclaim(m);
                BBPreclaim(p);
                if (res1) BBPreclaim(res1);
                throw(MAL, "algebra.outerselect", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
        }
-       assert(l->tsorted);
-       /* TODO handle void cases */
+       assert(g->tsorted);
        oid *ri1 = Tloc(res1, 0);
        bit *ri2 = Tloc(res2, 0);
-       oid *li = Tloc(l, 0);
-       bit *ri = Tloc(r, 0);
+       oid *gi = Tloc(g, 0);
+       bit *mi = Tloc(m, 0);
        bit *pi = Tloc(p, 0);
        oid cur = oid_nil;
 
-       if (!li) { /* void case ? */
-               oid c = l->hseqbase;
+       if (!gi) { /* void case ? */
+               oid c = g->hseqbase;
                for (BUN n = 0; n < nr; n++, c++) {
                        ri1[q] = c;
-                       ri2[q] = (ri[n]!=bit_nil && pi[n] == TRUE)?TRUE:bit_nil;
+                       ri2[q] = (mi[n]==bit_nil || pi[n] == FALSE || (!any && 
pi[n] == bit_nil))?bit_nil:(pi[n] == TRUE)?TRUE:FALSE;
                        q++;
                }
        } else {
-               oid c = l->hseqbase;
+               oid c = g->hseqbase;
                if (nr)
-                       cur = li[0];
+                       cur = gi[0];
                bool used = false;
                for (BUN n = 0; n < nr; n++, c++) {
-                       if (c && cur != li[n]) {
+                       if (c && cur != gi[n]) {
                                if (!used) {
                                        ri1[q] = c-1;
                                        ri2[q] = bit_nil;
                                        q++;
                                }
                                used = false;
-                               cur = li[n];
+                               cur = gi[n];
                        }
-                       if (ri[n] == bit_nil || pi[n] == TRUE) {
+                       if (mi[n] == TRUE && pi[n] == TRUE) {
+                               ri1[q] = c;
+                               ri2[q] = TRUE;
+                               used = true;
+                               q++;
+                       } else if (mi[n] == bit_nil) { /* empty */
                                ri1[q] = c;
-                               ri2[q] = (ri[n] == bit_nil)?bit_nil:TRUE;
-                               cur = li[n];
+                               ri2[q] = bit_nil;
+                               used = true;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to