Changeset: 6d14c96f3329 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6d14c96f3329
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk_join.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
        sql/server/rel_optimizer.c
        sql/server/rel_unnest.c
        
sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-1join-view.stable.out
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-2join-view.stable.out
Branch: subquery
Log Message:

improvements for antijoin


diffs (truncated from 333 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,7 +113,7 @@ BAT *BATconvert(BAT *b, BAT *s, int tp, 
 BUN BATcount_no_nil(BAT *b);
 gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
 BAT *BATdense(oid hseq, oid tseq, BUN cnt) 
__attribute__((__warn_unused_result__));
-BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool 
nil_clears, BUN estimate);
+BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, 
BUN estimate);
 gdk_return BATextend(BAT *b, BUN newcap) 
__attribute__((__warn_unused_result__));
 void BATfakeCommit(BAT *b);
 gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, bool asc, bool nilslast, bool distinct) 
__attribute__((__warn_unused_result__));
@@ -703,7 +703,7 @@ str ALGcount_bat(lng *result, const bat 
 str ALGcount_nil(lng *result, const bat *bid, const bit *ignore_nils);
 str ALGcount_no_nil(lng *result, const bat *bid);
 str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat *rid);
-str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const bit *nil_clears, const lng 
*estimate);
+str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const bit *not_in, const lng 
*estimate);
 str ALGexist(bit *ret, const bat *bid, const void *val);
 str ALGfetchoid(ptr ret, const bat *bid, const oid *pos);
 str ALGfind(oid *ret, const bat *bid, ptr val);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2572,16 +2572,19 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
        t = ATOMbasetype(r->ttype);
 
        if (not_in && !r->tnonil) {
+               ro = r->hseqbase;
                for (rb = HASHget(hsh, HASHprobe(hsh, nil));
                     rb != HASHnil(hsh);
                     rb = HASHgetlink(hsh, rb)) {
-                       ro = BUNtoid(sr, rb);
+                       if (sr)
+                               ro = BUNtoid(sr, rb);
+                       else
+                               ro = rb;
                        if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
                                return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL,
                                               false, false, "hashjoin", t0);
                        }
                }
-               
        }
 
        BAT *r1 = *r1p;
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -2103,6 +2103,87 @@ rel2bin_join(backend *be, sql_rel *rel, 
        return stmt_list(be, l);
 }
 
+static int
+exp_is_mark(sql_exp *e)
+{
+       if (e->type == e_cmp && 
+               (e->flag == mark_in || e->flag == mark_notin ||
+                e->flag == mark_exists || e->flag == mark_notexists)) 
+               return 1;
+       return 0;
+}
+
+static stmt *
+rel2bin_antijoin(backend *be, sql_rel *rel, list *refs)
+{
+       mvc *sql = be->mvc;
+       list *l, *jexps = NULL, *mexps = NULL;
+       node *en = NULL, *n;
+       stmt *left = NULL, *right = NULL, *join = NULL;
+
+       if (rel->l) /* first construct the left sub relation */
+               left = subrel_bin(be, rel->l, refs);
+       if (rel->r) /* first construct the right sub relation */
+               right = subrel_bin(be, rel->r, refs);
+       if (!left || !right) 
+               return NULL;    
+       left = row2cols(be, left);
+       right = row2cols(be, right);
+
+       if (rel->exps) {
+
+               jexps = sa_list(sql->sa);
+               mexps = sa_list(sql->sa);
+
+               for( en = rel->exps->h; en; en = en->next ) {
+                       sql_exp *e = en->data;
+
+                       if (e->type != e_cmp)
+                               assert(0);
+                       if (exp_is_mark(e))
+                               append(mexps, e);
+                       else
+                               append(jexps, e);
+               }
+       }
+       /* handle join-ing conditions first */
+       if (!list_empty(jexps)) {
+       //      assert(0);
+               if (list_empty(mexps))
+                       mexps = jexps;
+       }
+       /* handle mark conditions second */
+       if (!list_empty(mexps)) { 
+               assert(list_length(mexps) == 1);
+               for( en = mexps->h; en; en = en->next ) {
+                       sql_exp *e = en->data;
+                       stmt *ls = exp_bin(be, e->l, left, right, NULL, NULL, 
NULL, NULL);
+                       stmt *rs = exp_bin(be, e->r, left, right, NULL, NULL, 
NULL, NULL);
+
+                       if (ls->nrcols == 0)
+                               ls = stmt_const(be, bin_first_column(be, left), 
ls);
+                       if (rs->nrcols == 0)
+                               rs = stmt_const(be, bin_first_column(be, 
right), rs);
+                       join = stmt_tdiff2(be, ls, rs);
+               }
+       }
+
+       /* construct relation */
+       l = sa_list(sql->sa);
+
+       /* project all the left columns */
+       for( n = left->op4.lval->h; n; n = n->next ) {
+               stmt *c = n->data;
+               const char *rnme = table_name(sql->sa, c);
+               const char *nme = column_name(sql->sa, c);
+               stmt *s = stmt_project(be, join, column(be, c));
+
+               s = stmt_alias(be, s, rnme, nme);
+               list_append(l, s);
+       }
+       return stmt_list(be, l);
+}
+
 static stmt *
 rel2bin_semijoin(backend *be, sql_rel *rel, list *refs)
 {
@@ -2111,6 +2192,9 @@ rel2bin_semijoin(backend *be, sql_rel *r
        node *en = NULL, *n;
        stmt *left = NULL, *right = NULL, *join = NULL, *jl, *jr, *c;
 
+       if (rel->op == op_anti && !list_empty(rel->exps) && 
list_length(rel->exps) == 1 && ((sql_exp*)rel->exps->h->data)->flag == 
mark_notin)
+               return rel2bin_antijoin(be, rel, refs);
+
        if (rel->l) /* first construct the left sub relation */
                left = subrel_bin(be, rel->l, refs);
        if (rel->r) /* first construct the right sub relation */
@@ -2124,11 +2208,12 @@ rel2bin_semijoin(backend *be, sql_rel *r
         *      first cheap join(s) (equality or idx) 
         *      second selects/filters 
         */
+       
+#if 0
        if (rel->exps && rel->op == op_anti && need_no_nil(rel)) {
                sql_subtype *lng = sql_bind_localtype("lng");
                stmt *nilcnt = NULL;
 
-               assert(0);
                for( en = rel->exps->h; en; en = en->next ) {
                        sql_exp *e = en->data, *r, *l;
                        stmt *s;
@@ -2181,6 +2266,7 @@ rel2bin_semijoin(backend *be, sql_rel *r
                        left = stmt_list(be, l);
                }
        }
+#endif
        if (rel->exps) {
                int idx = 0;
                list *lje = sa_list(sql->sa);
@@ -2192,7 +2278,7 @@ rel2bin_semijoin(backend *be, sql_rel *r
                        stmt *s = NULL;
 
                        /* only handle simple joins here */             
-                       if (/*list_length(lje) &&*/ (idx || e->type != e_cmp || 
(e->flag != cmp_equal && e->flag != mark_in)))
+                       if (idx || e->type != e_cmp || (e->flag != cmp_equal && 
e->flag != mark_in))
                                break;
                        if ((exp_has_func(e) && get_cmp(e) != cmp_filter) ||
                            (get_cmp(e) == cmp_or)) { 
@@ -2324,7 +2410,7 @@ rel2bin_distinct(backend *be, stmt *s, s
                return s;
 
        /* Use 'all' tid columns */
-       if (0 && (tids = bin_find_columns(be, s, TID)) != NULL) {
+       if (/* DISABLES CODE */ (0) && (tids = bin_find_columns(be, s, TID)) != 
NULL) {
                for (n = tids->h; n; n = n->next) {
                        stmt *t = n->data;
 
diff --git a/sql/backends/monet5/sql_statement.c 
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -1795,7 +1795,43 @@ stmt_tdiff(backend *be, stmt *op1, stmt 
        q = pushNil(mb, q, TYPE_bat); /* left candidate */
        q = pushNil(mb, q, TYPE_bat); /* right candidate */
        q = pushBit(mb, q, FALSE);    /* nil matches */
-       q = pushBit(mb, q, FALSE);    /* not in */
+       q = pushBit(mb, q, FALSE);    /* do not clear nils */    
+       q = pushNil(mb, q, TYPE_lng); /* estimate */
+
+       if (q) {
+               stmt *s = stmt_create(be->mvc->sa, st_tdiff);
+               if (s == NULL) {
+                       freeInstruction(q);
+                       return NULL;
+               }
+
+               s->op1 = op1;
+               s->op2 = op2;
+               s->nrcols = op1->nrcols;
+               s->key = op1->key;
+               s->aggr = op1->aggr;
+               s->nr = getDestVar(q);
+               s->q = q;
+               return s;
+       }
+       return NULL;
+}
+
+stmt *
+stmt_tdiff2(backend *be, stmt *op1, stmt *op2)
+{
+       InstrPtr q = NULL;
+       MalBlkPtr mb = be->mb;
+
+       if (op1->nr < 0 || op2->nr < 0)
+               return NULL;
+       q = newStmt(mb, algebraRef, differenceRef);
+       q = pushArgument(mb, q, op1->nr); /* left */
+       q = pushArgument(mb, q, op2->nr); /* right */
+       q = pushNil(mb, q, TYPE_bat); /* left candidate */
+       q = pushNil(mb, q, TYPE_bat); /* right candidate */
+       q = pushBit(mb, q, FALSE);    /* nil matches */
+       q = pushBit(mb, q, TRUE);     /* clear nils */
        q = pushNil(mb, q, TYPE_lng); /* estimate */
 
        if (q) {
diff --git a/sql/backends/monet5/sql_statement.h 
b/sql/backends/monet5/sql_statement.h
--- a/sql/backends/monet5/sql_statement.h
+++ b/sql/backends/monet5/sql_statement.h
@@ -178,6 +178,7 @@ extern stmt *stmt_genselect(backend *be,
 
 extern stmt *stmt_tunion(backend *be, stmt *op1, stmt *op2);
 extern stmt *stmt_tdiff(backend *be, stmt *op1, stmt *op2);
+extern stmt *stmt_tdiff2(backend *be, stmt *op1, stmt *op2);
 extern stmt *stmt_tinter(backend *be, stmt *op1, stmt *op2);
 
 extern stmt *stmt_join(backend *be, stmt *op1, stmt *op2, int anti, comp_type 
cmptype);
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -8925,7 +8925,7 @@ optimize_rel(mvc *sql, sql_rel *rel, int
                /* rel_join_order may introduce empty selects */
                rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); 
 
-               if (level <= 0)
+               if (level <= 0 && 0)
                        rel = rewrite(sql, rel, &rel_join_push_exps_down, 
&changes); 
        }
 
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -574,6 +574,24 @@ move_join_exps(mvc *sql, sql_rel *j, sql
 }
 
 static sql_rel *
+push_up_select_l(mvc *sql, sql_rel *rel) 
+{
+       (void)sql;
+       /* input rel is dependent join with on the right a project */ 
+       if (rel && (is_join(rel->op) || is_semi(rel->op))) {
+               sql_rel *l = rel->l;
+
+               if (is_select(l->op) && rel_has_freevar(l) && !rel_is_ref(l) ) {
+                       /* push up select (above join) */
+                       rel->l = l->l;
+                       l->l = rel;
+                       return l;
+               }
+       }
+       return rel;
+}
+
+static sql_rel *
 push_up_join(mvc *sql, sql_rel *rel) 
 {
        /* input rel is dependent join with on the right a project */ 
@@ -590,6 +608,10 @@ push_up_join(mvc *sql, sql_rel *rel)
                         * */
                        list *rd = NULL, *ld = NULL; 
 
+                       if (is_semi(j->op) && is_select(jl->op) && 
rel_has_freevar(jl) && !rel_is_ref(jl)) {
+                               rel->r = j = push_up_select_l(sql, j);
+                               return rel; /* ie try again */
+                       }
                        rd = (j->op != op_full)?rel_dependent_var(sql, d, 
jr):(list*)1;
                        ld = (((j->op == op_join && rd) || j->op == 
op_right))?rel_dependent_var(sql, d, jl):(list*)1;
 
diff --git 
a/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
 
b/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
--- 
a/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
+++ 
b/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to