Changeset: 6d14c96f3329 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6d14c96f3329
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk_join.c
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql_statement.c
sql/backends/monet5/sql_statement.h
sql/server/rel_optimizer.c
sql/server/rel_unnest.c
sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-1join-view.stable.out
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-2join-view.stable.out
Branch: subquery
Log Message:
improvements for antijoin
diffs (truncated from 333 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,7 +113,7 @@ BAT *BATconvert(BAT *b, BAT *s, int tp,
BUN BATcount_no_nil(BAT *b);
gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
BAT *BATdense(oid hseq, oid tseq, BUN cnt)
__attribute__((__warn_unused_result__));
-BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool
nil_clears, BUN estimate);
+BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in,
BUN estimate);
gdk_return BATextend(BAT *b, BUN newcap)
__attribute__((__warn_unused_result__));
void BATfakeCommit(BAT *b);
gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps,
BUN n, bool asc, bool nilslast, bool distinct)
__attribute__((__warn_unused_result__));
@@ -703,7 +703,7 @@ str ALGcount_bat(lng *result, const bat
str ALGcount_nil(lng *result, const bat *bid, const bit *ignore_nils);
str ALGcount_no_nil(lng *result, const bat *bid);
str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat *rid);
-str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid,
const bat *srid, const bit *nil_matches, const bit *nil_clears, const lng
*estimate);
+str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid,
const bat *srid, const bit *nil_matches, const bit *not_in, const lng
*estimate);
str ALGexist(bit *ret, const bat *bid, const void *val);
str ALGfetchoid(ptr ret, const bat *bid, const oid *pos);
str ALGfind(oid *ret, const bat *bid, ptr val);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2572,16 +2572,19 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
t = ATOMbasetype(r->ttype);
if (not_in && !r->tnonil) {
+ ro = r->hseqbase;
for (rb = HASHget(hsh, HASHprobe(hsh, nil));
rb != HASHnil(hsh);
rb = HASHgetlink(hsh, rb)) {
- ro = BUNtoid(sr, rb);
+ if (sr)
+ ro = BUNtoid(sr, rb);
+ else
+ ro = rb;
if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL,
false, false, "hashjoin", t0);
}
}
-
}
BAT *r1 = *r1p;
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -2103,6 +2103,87 @@ rel2bin_join(backend *be, sql_rel *rel,
return stmt_list(be, l);
}
+static int
+exp_is_mark(sql_exp *e)
+{
+ if (e->type == e_cmp &&
+ (e->flag == mark_in || e->flag == mark_notin ||
+ e->flag == mark_exists || e->flag == mark_notexists))
+ return 1;
+ return 0;
+}
+
+static stmt *
+rel2bin_antijoin(backend *be, sql_rel *rel, list *refs)
+{
+ mvc *sql = be->mvc;
+ list *l, *jexps = NULL, *mexps = NULL;
+ node *en = NULL, *n;
+ stmt *left = NULL, *right = NULL, *join = NULL;
+
+ if (rel->l) /* first construct the left sub relation */
+ left = subrel_bin(be, rel->l, refs);
+ if (rel->r) /* first construct the right sub relation */
+ right = subrel_bin(be, rel->r, refs);
+ if (!left || !right)
+ return NULL;
+ left = row2cols(be, left);
+ right = row2cols(be, right);
+
+ if (rel->exps) {
+
+ jexps = sa_list(sql->sa);
+ mexps = sa_list(sql->sa);
+
+ for( en = rel->exps->h; en; en = en->next ) {
+ sql_exp *e = en->data;
+
+ if (e->type != e_cmp)
+ assert(0);
+ if (exp_is_mark(e))
+ append(mexps, e);
+ else
+ append(jexps, e);
+ }
+ }
+ /* handle join-ing conditions first */
+ if (!list_empty(jexps)) {
+ // assert(0);
+ if (list_empty(mexps))
+ mexps = jexps;
+ }
+ /* handle mark conditions second */
+ if (!list_empty(mexps)) {
+ assert(list_length(mexps) == 1);
+ for( en = mexps->h; en; en = en->next ) {
+ sql_exp *e = en->data;
+ stmt *ls = exp_bin(be, e->l, left, right, NULL, NULL,
NULL, NULL);
+ stmt *rs = exp_bin(be, e->r, left, right, NULL, NULL,
NULL, NULL);
+
+ if (ls->nrcols == 0)
+ ls = stmt_const(be, bin_first_column(be, left),
ls);
+ if (rs->nrcols == 0)
+ rs = stmt_const(be, bin_first_column(be,
right), rs);
+ join = stmt_tdiff2(be, ls, rs);
+ }
+ }
+
+ /* construct relation */
+ l = sa_list(sql->sa);
+
+ /* project all the left columns */
+ for( n = left->op4.lval->h; n; n = n->next ) {
+ stmt *c = n->data;
+ const char *rnme = table_name(sql->sa, c);
+ const char *nme = column_name(sql->sa, c);
+ stmt *s = stmt_project(be, join, column(be, c));
+
+ s = stmt_alias(be, s, rnme, nme);
+ list_append(l, s);
+ }
+ return stmt_list(be, l);
+}
+
static stmt *
rel2bin_semijoin(backend *be, sql_rel *rel, list *refs)
{
@@ -2111,6 +2192,9 @@ rel2bin_semijoin(backend *be, sql_rel *r
node *en = NULL, *n;
stmt *left = NULL, *right = NULL, *join = NULL, *jl, *jr, *c;
+ if (rel->op == op_anti && !list_empty(rel->exps) &&
list_length(rel->exps) == 1 && ((sql_exp*)rel->exps->h->data)->flag ==
mark_notin)
+ return rel2bin_antijoin(be, rel, refs);
+
if (rel->l) /* first construct the left sub relation */
left = subrel_bin(be, rel->l, refs);
if (rel->r) /* first construct the right sub relation */
@@ -2124,11 +2208,12 @@ rel2bin_semijoin(backend *be, sql_rel *r
* first cheap join(s) (equality or idx)
* second selects/filters
*/
+
+#if 0
if (rel->exps && rel->op == op_anti && need_no_nil(rel)) {
sql_subtype *lng = sql_bind_localtype("lng");
stmt *nilcnt = NULL;
- assert(0);
for( en = rel->exps->h; en; en = en->next ) {
sql_exp *e = en->data, *r, *l;
stmt *s;
@@ -2181,6 +2266,7 @@ rel2bin_semijoin(backend *be, sql_rel *r
left = stmt_list(be, l);
}
}
+#endif
if (rel->exps) {
int idx = 0;
list *lje = sa_list(sql->sa);
@@ -2192,7 +2278,7 @@ rel2bin_semijoin(backend *be, sql_rel *r
stmt *s = NULL;
/* only handle simple joins here */
- if (/*list_length(lje) &&*/ (idx || e->type != e_cmp ||
(e->flag != cmp_equal && e->flag != mark_in)))
+ if (idx || e->type != e_cmp || (e->flag != cmp_equal &&
e->flag != mark_in))
break;
if ((exp_has_func(e) && get_cmp(e) != cmp_filter) ||
(get_cmp(e) == cmp_or)) {
@@ -2324,7 +2410,7 @@ rel2bin_distinct(backend *be, stmt *s, s
return s;
/* Use 'all' tid columns */
- if (0 && (tids = bin_find_columns(be, s, TID)) != NULL) {
+ if (/* DISABLES CODE */ (0) && (tids = bin_find_columns(be, s, TID)) !=
NULL) {
for (n = tids->h; n; n = n->next) {
stmt *t = n->data;
diff --git a/sql/backends/monet5/sql_statement.c
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -1795,7 +1795,43 @@ stmt_tdiff(backend *be, stmt *op1, stmt
q = pushNil(mb, q, TYPE_bat); /* left candidate */
q = pushNil(mb, q, TYPE_bat); /* right candidate */
q = pushBit(mb, q, FALSE); /* nil matches */
- q = pushBit(mb, q, FALSE); /* not in */
+ q = pushBit(mb, q, FALSE); /* do not clear nils */
+ q = pushNil(mb, q, TYPE_lng); /* estimate */
+
+ if (q) {
+ stmt *s = stmt_create(be->mvc->sa, st_tdiff);
+ if (s == NULL) {
+ freeInstruction(q);
+ return NULL;
+ }
+
+ s->op1 = op1;
+ s->op2 = op2;
+ s->nrcols = op1->nrcols;
+ s->key = op1->key;
+ s->aggr = op1->aggr;
+ s->nr = getDestVar(q);
+ s->q = q;
+ return s;
+ }
+ return NULL;
+}
+
+stmt *
+stmt_tdiff2(backend *be, stmt *op1, stmt *op2)
+{
+ InstrPtr q = NULL;
+ MalBlkPtr mb = be->mb;
+
+ if (op1->nr < 0 || op2->nr < 0)
+ return NULL;
+ q = newStmt(mb, algebraRef, differenceRef);
+ q = pushArgument(mb, q, op1->nr); /* left */
+ q = pushArgument(mb, q, op2->nr); /* right */
+ q = pushNil(mb, q, TYPE_bat); /* left candidate */
+ q = pushNil(mb, q, TYPE_bat); /* right candidate */
+ q = pushBit(mb, q, FALSE); /* nil matches */
+ q = pushBit(mb, q, TRUE); /* clear nils */
q = pushNil(mb, q, TYPE_lng); /* estimate */
if (q) {
diff --git a/sql/backends/monet5/sql_statement.h
b/sql/backends/monet5/sql_statement.h
--- a/sql/backends/monet5/sql_statement.h
+++ b/sql/backends/monet5/sql_statement.h
@@ -178,6 +178,7 @@ extern stmt *stmt_genselect(backend *be,
extern stmt *stmt_tunion(backend *be, stmt *op1, stmt *op2);
extern stmt *stmt_tdiff(backend *be, stmt *op1, stmt *op2);
+extern stmt *stmt_tdiff2(backend *be, stmt *op1, stmt *op2);
extern stmt *stmt_tinter(backend *be, stmt *op1, stmt *op2);
extern stmt *stmt_join(backend *be, stmt *op1, stmt *op2, int anti, comp_type
cmptype);
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -8925,7 +8925,7 @@ optimize_rel(mvc *sql, sql_rel *rel, int
/* rel_join_order may introduce empty selects */
rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes);
- if (level <= 0)
+ if (level <= 0 && 0)
rel = rewrite(sql, rel, &rel_join_push_exps_down,
&changes);
}
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -574,6 +574,24 @@ move_join_exps(mvc *sql, sql_rel *j, sql
}
static sql_rel *
+push_up_select_l(mvc *sql, sql_rel *rel)
+{
+ (void)sql;
+ /* input rel is dependent join with on the right a project */
+ if (rel && (is_join(rel->op) || is_semi(rel->op))) {
+ sql_rel *l = rel->l;
+
+ if (is_select(l->op) && rel_has_freevar(l) && !rel_is_ref(l) ) {
+ /* push up select (above join) */
+ rel->l = l->l;
+ l->l = rel;
+ return l;
+ }
+ }
+ return rel;
+}
+
+static sql_rel *
push_up_join(mvc *sql, sql_rel *rel)
{
/* input rel is dependent join with on the right a project */
@@ -590,6 +608,10 @@ push_up_join(mvc *sql, sql_rel *rel)
* */
list *rd = NULL, *ld = NULL;
+ if (is_semi(j->op) && is_select(jl->op) &&
rel_has_freevar(jl) && !rel_is_ref(jl)) {
+ rel->r = j = push_up_select_l(sql, j);
+ return rel; /* ie try again */
+ }
rd = (j->op != op_full)?rel_dependent_var(sql, d,
jr):(list*)1;
ld = (((j->op == op_join && rd) || j->op ==
op_right))?rel_dependent_var(sql, d, jl):(list*)1;
diff --git
a/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
b/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
---
a/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
+++
b/sql/test/BugTracker-2017/Tests/crash_correlated_subqueries_not_in_select.Bug-6290.stable.out
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list