Changeset: 752a1e1157c2 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/752a1e1157c2
Modified Files:
        gdk/gdk_cross.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
        sql/rel.txt
        sql/server/rel_optimize_proj.c
        sql/server/rel_optimize_sel.c
        sql/server/rel_optimizer.c
        sql/server/rel_unnest.c
        sql/test/sql_dump/Tests/dump.test
Branch: groupjoin
Log Message:

use groupjoin also for exists (groupjoins have an attribute result list, for 
any/all/in we add a boolean result atom expression, for exists this expression 
is marked with 'need no nil'

the gdk/mal level outercross is now an left-outercross (like the outerjoin)


diffs (truncated from 466 to 300 lines):

diff --git a/gdk/gdk_cross.c b/gdk/gdk_cross.c
--- a/gdk/gdk_cross.c
+++ b/gdk/gdk_cross.c
@@ -151,6 +151,7 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
        return BATcrossci(r1p, r2p, &ci1, &ci2);
 }
 
+/* [left] outer cross */
 gdk_return
 BAToutercross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool 
max_one)
 {
@@ -164,26 +165,18 @@ BAToutercross(BAT **r1p, BAT **r2p, BAT 
        }
 
        if (ci1.ncand == 0) {
-               BAT *bn = COLnew(0, TYPE_void, ci2.ncand, TRANSIENT);
+               BAT *bn = COLnew(0, TYPE_void, 0, TRANSIENT);
                if (bn == NULL)
                        return GDK_FAIL;
                BATtseqbase(bn, oid_nil);
-               BATsetcount(bn, ci2.ncand);
                *r1p = bn;
                if (r2p) {
-                       if (ci2.ncand == 0) {
-                               bn = COLnew(0, TYPE_void, ci2.ncand, TRANSIENT);
-                               if (bn != NULL) {
-                                       BATtseqbase(bn, oid_nil);
-                                       BATsetcount(bn, ci2.ncand);
-                               }
-                       } else {
-                               bn = canditer_slice(&ci2, 0, ci2.ncand);
-                       }
+                       bn = COLnew(0, TYPE_void, 0, TRANSIENT);
                        if (bn == NULL) {
                                BBPreclaim(*r1p);
                                return GDK_FAIL;
                        }
+                       BATtseqbase(bn, oid_nil);
                        *r2p = bn;
                }
                return GDK_SUCCEED;
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -2826,6 +2826,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
        list *l;
        node *n , *en;
        stmt *left = NULL, *right = NULL, *join = NULL, *jl, *jr, *res;
+       bool need_project = false;
 
        if (rel->l) /* first construct the left sub relation */
                left = subrel_bin(be, rel->l, refs);
@@ -2838,7 +2839,6 @@ rel2bin_groupjoin(backend *be, sql_rel *
        left = row2cols(be, left);
        right = row2cols(be, right);
 
-       assert(!list_empty(rel->exps));
        list *jexps = mark_joins_last(be->mvc->sa, rel);
 
        en = jexps->h;
@@ -2846,8 +2846,12 @@ rel2bin_groupjoin(backend *be, sql_rel *
                printf("# outer cross\n");
                stmt *l = bin_find_smallest_column(be, left);
                stmt *r = bin_find_smallest_column(be, right);
-
+               if (list_empty(jexps)) {
+                       stmt *limit = stmt_limit(be, r, NULL, NULL, 
stmt_atom_lng(be, 0), stmt_atom_lng(be, 1), 0, 0, 0, 0, 0);
+                       r = stmt_project(be, limit, r);
+               }
                join = stmt_join_cand(be, column(be, l), column(be, r), 
left->cand, NULL/*right->cand*/, 0, cmp_all, 0, 0, false, rel->op == 
op_left?false:true);
+               need_project = true;
        } else if (!list_empty(jexps)) {
                /* markthetajoin()
                 * or left-join followed by markthetaselect */
@@ -2860,7 +2864,6 @@ rel2bin_groupjoin(backend *be, sql_rel *
                 * if no cmp* join exps, the markjoin, ie also handling empty 
righ case
                 */
                en = en->next;
-               assert(en || (e->type == e_cmp && (e->flag == mark_in || 
e->flag == mark_notin)));
                stmt *l = exp_bin(be, e->l, left, NULL, NULL, NULL, NULL, NULL, 
0, 1, 0), *r = NULL;
                bool swap = false;
 
@@ -2882,7 +2885,6 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        l = r;
                        r = t;
                }
-               //join = stmt_markjoin(be, l, r);
                if (en) {
                        int flag = e->flag;
                        if (flag == mark_in)
@@ -2891,8 +2893,21 @@ rel2bin_groupjoin(backend *be, sql_rel *
                                flag = cmp_notequal;
                        assert(!left->cand);
                        join = stmt_join_cand(be, column(be, l), column(be, r), 
left->cand, NULL/*right->cand*/, is_anti(e), (comp_type) e->flag, 0, 
is_semantics(e), false, rel->op == op_left?false:true);
-               } else
-                       join = stmt_join(be, l, r, 0, e->flag, 0, 0, false);
+               } else {
+                       int cmp = e->flag;
+
+                       if (cmp != mark_in && cmp != mark_notin && rel->op == 
op_left) { /* left outer group join */
+                               if (list_length(rel->attr) == 1) {
+                                       sql_exp *e = rel->attr->h->data;
+                                       if (exp_is_atom(e) && exp_is_true(e)) {
+                                               cmp = mark_in;
+                                       } else if (exp_is_atom(e) && 
exp_is_false(e)) {
+                                               cmp = mark_notin;
+                                       }
+                               }
+                       }
+                       join = stmt_join(be, l, r, 0, cmp, 0, 0, false);
+               }
        }
        jl = stmt_result(be, join, 0);
        /* mark result */
@@ -2901,6 +2916,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
        if (en) {
                stmt *sub, *sel = NULL, *osel = NULL;
                list *nl;
+               need_project = false;
 
                /* construct relation */
                nl = sa_list(sql->sa);
@@ -2933,14 +2949,15 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        sql_exp *e = en->data;
                        stmt *l = NULL, *r = NULL;
                        int flag = e->flag;
-                       assert (e->type == e_cmp);
-                       if (flag == cmp_equal || flag == cmp_notequal || flag 
== mark_in || flag == mark_notin){
+                       bool is_semantics = is_semantics(e);
+                       if ((flag == cmp_equal || flag == cmp_notequal || flag 
== mark_in || flag == mark_notin) && (!is_semantics || en->next)){
                                l = exp_bin(be, e->l, sub, NULL, NULL, NULL, 
NULL, NULL, 1, 0, 0);
                                r = exp_bin(be, e->r, sub, NULL, NULL, NULL, 
NULL, NULL, 1, 0, 0);
                        } else {
                                l = exp_bin(be, e, sub, NULL, NULL, NULL, NULL, 
NULL, 0, 0, 0);
                                r = stmt_bool(be, true);
                                flag = cmp_equal;
+                               is_semantics = false;
                        }
 
                        if (!l || !r) {
@@ -2966,15 +2983,27 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        if (en->next) {
                                if (rel->op == op_left) {
                                        /* outerselect(li, rid, [l==r]) */
-                                       join = stmt_outerselect(be, li, jr, l, 
r, flag);
+                                       join = stmt_outerselect(be, li, jr, l, 
r, flag, is_semantics);
                                        sel = stmt_result(be, join, 0);
                                        jr = stmt_result(be, join, 1);
                                } else {
                                        sel = stmt_uselect(be, l, r, flag, sel, 
0, 0);
                                }
                        } else {
-                               assert(e->flag == mark_in || e->flag == 
mark_notin);
-                               join = stmt_markselect(be, li, jr, l, r, 
e->flag);
+                               int cmp = e->flag;
+
+                               if (cmp != mark_in && cmp != mark_notin && 
rel->op == op_left) { /* left outer group join */
+                                       if (list_length(rel->attr) == 1) {
+                                               sql_exp *e = rel->attr->h->data;
+                                               if (exp_is_atom(e) && 
exp_is_true(e)) {
+                                                       cmp = mark_in;
+                                               } else if (exp_is_atom(e) && 
exp_is_false(e)) {
+                                                       cmp = mark_notin;
+                                               }
+                                       }
+                               }
+                               assert(cmp == mark_in || cmp == mark_notin);
+                               join = stmt_markselect(be, li, jr, l, r, cmp);
                        }
                        /* go back to offset in the table */
                        if (sel && osel)
@@ -3000,19 +3029,28 @@ rel2bin_groupjoin(backend *be, sql_rel *
                sql_exp *e = rel->attr->h->data;
                const char *rnme = exp_relname(e);
                const char *nme = exp_name(e);
-               /*
-               stmt *last = l->t->data;
-               sql_subtype *tp = tail_type(last);
-
-               sql_subfunc *isnil = sql_bind_func(sql, "sys", "isnull", tp, 
NULL, F_FUNC, true);
-
-               stmt *s = stmt_unop(be, last, NULL, isnil);
-
-               sql_subtype *bt = sql_bind_localtype("bit");
-               sql_subfunc *not = sql_bind_func(be->mvc, "sys", "not", bt, 
NULL, F_FUNC, true);
-
-               s = stmt_unop(be, s, NULL, not);
-               */
+
+               if (need_project) {
+                       bool exist = true;
+                       if (list_empty(rel->exps) && rel->op == op_left) { /* 
left outer group join */
+                               if (list_length(rel->attr) == 1) {
+                                       sql_exp *e = rel->attr->h->data;
+                                       if (exp_is_atom(e) && exp_is_false(e)) {
+                                               exist = false;
+                                       }
+                               }
+                       }
+                       jr = sql_Nop_(be, "ifthenelse", sql_unop_(be, "isnull", 
jr), stmt_bool(be, !exist), stmt_bool(be, exist), NULL);
+               } else if (list_length(rel->attr) == 1) {
+                       sql_exp *e = rel->attr->h->data;
+                       if (exp_is_atom(e)) {
+                               bool exist = true;
+                               if (exp_is_false(e))
+                                       exist = false;
+                               if (need_no_nil(e))
+                                       jr = sql_Nop_(be, "ifthenelse", 
sql_unop_(be, "isnull", jr), stmt_bool(be, !exist), jr, NULL);
+                       }
+               }
                stmt *s = stmt_alias(be, jr, rnme, nme);
                list_append(l, s);
        }
diff --git a/sql/backends/monet5/sql_statement.c 
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -2063,13 +2063,15 @@ select2_join2(backend *be, stmt *op1, st
 }
 
 stmt *
-stmt_outerselect(backend *be, stmt *li, stmt *ri, stmt *l, stmt *r, int cmp)
+stmt_outerselect(backend *be, stmt *li, stmt *ri, stmt *l, stmt *r, int cmp, 
bool is_semantics)
 {
        MalBlkPtr mb = be->mb;
        InstrPtr q;
 
        if ((q = multiplex2(mb, calcRef, 
convertMultiplexFcn(cmp==cmp_equal?"=":"!="), l->nr, r->nr, TYPE_bit)) == NULL)
                return NULL;
+       if (is_semantics)
+               q = pushBit(mb, q, TRUE);
        int p = getDestVar(q);
 
        q = newStmtArgs(mb, algebraRef, outerselectRef, 6);
diff --git a/sql/backends/monet5/sql_statement.h 
b/sql/backends/monet5/sql_statement.h
--- a/sql/backends/monet5/sql_statement.h
+++ b/sql/backends/monet5/sql_statement.h
@@ -196,7 +196,7 @@ extern stmt *stmt_uselect(backend *be, s
        */
 extern stmt *stmt_uselect2(backend *be, stmt *op1, stmt *op2, stmt *op3, int 
cmp, stmt *sub, int anti, int symmetric, int reduce);
 extern stmt *stmt_genselect(backend *be, stmt *lops, stmt *rops, sql_subfunc 
*f, stmt *sub, int anti);
-extern stmt *stmt_outerselect(backend *be, stmt *li, stmt *ri, stmt *l, stmt 
*r, int cmp);
+extern stmt *stmt_outerselect(backend *be, stmt *li, stmt *ri, stmt *l, stmt 
*r, int cmp, bool is_semantics);
 extern stmt *stmt_markselect(backend *be, stmt *li, stmt *mask, stmt *l, stmt 
*r, int cmp);
 
 extern stmt *stmt_tunion(backend *be, stmt *op1, stmt *op2);
diff --git a/sql/rel.txt b/sql/rel.txt
--- a/sql/rel.txt
+++ b/sql/rel.txt
@@ -40,6 +40,10 @@ JOIN         (card MULTI)
        -> l            is left relation
        -> r            is right relation
        -> flag         LEFT (keep left order, only used during code generation)
+       -> attr         empty -> normal joins, list of expressions for group 
joins
+                       special case for mark joins if expression is a boolean
+                               true/false anyequal/not(anyequal) behaviour
+                               if need_no_nil set, exist behaviour
 
 SEMI/ANTI 
        (card MULTI)
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -502,7 +502,7 @@ rel_push_project_up_(visitor *v, sql_rel
                }
                nlexps = list_length(exps);
                /* also handle right hand of join */
-               if (is_join(rel->op) && r->op == op_project && r->l) {
+               if (is_join(rel->op) && r->op == op_project && r->l && 
list_empty(rel->attr)) {
                        /* Here we also check all expressions of r like above
                           but also we need to check for ambigious names. */
 
@@ -520,14 +520,14 @@ rel_push_project_up_(visitor *v, sql_rel
                                        return rel;
                                }
                        }
-               } else if (is_join(rel->op)) {
+               } else if (is_join(rel->op) && list_empty(rel->attr)) {
                        list *r_exps = rel_projections(v->sql, r, NULL, 1, 1);
                        list_merge(exps, r_exps, (fdup)NULL);
-                       if (rel->attr)
-                               append(exps, exp_ref(v->sql, 
rel->attr->h->data));
                }
+               if (!list_empty(rel->attr))
+                       append(exps, exp_ref(v->sql, rel->attr->h->data));
                /* Here we should check for ambigious names ? */
-               if (is_join(rel->op) && r) {
+               if (is_join(rel->op) && r && list_empty(rel->attr)) {
                        t = (l->op == op_project && l->l)?l->l:l;
                        l_exps = rel_projections(v->sql, t, NULL, 1, 1);
                        /* conflict with old right expressions */
diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -1821,16 +1821,15 @@ exps_count(list *exps)
 static list *
 order_join_expressions(mvc *sql, list *dje, list *rels)
 {
+       node *n;
+       int cnt = list_length(dje);
+
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to