Changeset: 1b14439a73e7 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/1b14439a73e7 Modified Files: sql/backends/monet5/rel_bin.c sql/include/sql_catalog.h sql/include/sql_relation.h sql/rel.txt sql/server/rel_dump.c sql/server/rel_exp.c sql/server/rel_exp.h sql/server/rel_optimize_others.c sql/server/rel_optimize_proj.c sql/server/rel_optimize_sel.c sql/server/rel_propagate.c sql/server/rel_rel.c sql/server/rel_rel.h sql/server/rel_rewriter.c sql/server/rel_rewriter.h sql/server/rel_select.c sql/server/rel_statistics.c sql/server/rel_unnest.c sql/server/sql_parser.y sql/server/sql_partition.c sql/test/BugTracker-2025/Tests/7615_join_reordering_2.test sql/test/miscellaneous/Tests/groupby_prepare.test Branch: nested Log Message:
merged with default diffs (truncated from 5249 to 300 lines): diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ # ChangeLog file for devel # This file is updated with Maddlog +* Mon Apr 28 2025 Niels Nes <[email protected]> +- Changed the way complex AND and OR expressions are handled. The new + expression tree uses 2 new cmp flag (cmp_con/cmp_dis), both expressions + hold lists of expressions. This structure reduces the need for stack + space, allowing way larger expressions trees to be handled. + diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -962,16 +962,15 @@ exp_count_no_nil_arg(sql_exp *e, stmt *e } static stmt * -exp_bin_or(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt *ext, stmt *cnt, stmt *sel, int depth, bool reduce, int push) +exp_bin_conjunctive(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt *ext, stmt *cnt, stmt *sel, int depth, bool reduce, int push) { sql_subtype *bt = sql_bind_localtype("bit"); list *l = e->l; node *n; - stmt *sel1 = NULL, *sel2 = NULL, *s = NULL; + stmt *sel1 = NULL, *s = NULL; int anti = is_anti(e); sel1 = sel; - sel2 = sel; for (n = l->h; n; n = n->next) { sql_exp *c = n->data; stmt *sin = (sel1 && sel1->nrcols)?sel1:NULL; @@ -983,62 +982,33 @@ exp_bin_or(backend *be, sql_exp *e, stmt if (!s) return s; - if (!reduce && sin) { + if (!reduce && sin && sin != sel) { sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"or":"and", bt, bt, F_FUNC, true, true); assert(f); s = stmt_binop(be, sin, s, NULL, f); - } else if (!sin && sel1 && sel1->nrcols == 0 && s->nrcols == 0) { + } else if (!reduce && !sin && sel1 && sel1->nrcols == 0 && s->nrcols == 0) { sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"or":"and", bt, bt, F_FUNC, true, true); assert(f); s = stmt_binop(be, sel1, s, sin, f); - } else if (sel1 && (sel1->nrcols == 0 || s->nrcols == 0)) { - stmt *predicate = bin_find_smallest_column(be, left); - - predicate = stmt_const(be, predicate, stmt_bool(be, 1)); - if (s->nrcols == 0) + } else if (reduce && ((sel1 && (sel1->nrcols == 0 || s->nrcols == 0)) || c->type != e_cmp)) { + if (s->nrcols) { + if (sel1 && (sel1->nrcols == 0 || s->nrcols == 0)) { + stmt *predicate = bin_find_smallest_column(be, left); + + predicate = stmt_const(be, predicate, stmt_bool(be, 1)); + s = stmt_uselect(be, predicate, sel1, cmp_equal, s, anti, is_semantics(c)); + } else + s = stmt_uselect(be, s, stmt_bool(be, 1), cmp_equal, sel1, anti, is_semantics(c)); + } else { + stmt *predicate = bin_find_smallest_column(be, left); + + predicate = stmt_const(be, predicate, stmt_bool(be, 1)); s = stmt_uselect(be, predicate, s, cmp_equal, sel1, anti, is_semantics(c)); - else - s = stmt_uselect(be, predicate, sel1, cmp_equal, s, anti, is_semantics(c)); + } } sel1 = s; } - l = e->r; - for (n = l->h; n; n = n->next) { - sql_exp *c = n->data; - stmt *sin = (sel2 && sel2->nrcols)?sel2:NULL; - - /* propagate the anti flag */ - if (anti) - set_anti(c); - s = exp_bin(be, c, left, right, grp, ext, cnt, reduce?sin:NULL, depth, reduce, push); - if (!s) - return s; - - if (!reduce && sin) { - sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"or":"and", bt, bt, F_FUNC, true, true); - assert(f); - s = stmt_binop(be, sin, s, NULL, f); - } else if (!sin && sel2 && sel2->nrcols == 0 && s->nrcols == 0) { - sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"or":"and", bt, bt, F_FUNC, true, true); - assert(f); - s = stmt_binop(be, sel2, s, sin, f); - } else if (sel2 && (sel2->nrcols == 0 || s->nrcols == 0)) { - stmt *predicate = bin_find_smallest_column(be, left); - - predicate = stmt_const(be, predicate, stmt_bool(be, 1)); - if (s->nrcols == 0) - s = stmt_uselect(be, predicate, s, cmp_equal, sel2, anti, 0); - else - s = stmt_uselect(be, predicate, sel2, cmp_equal, s, anti, 0); - } - sel2 = s; - } - if (sel1->nrcols == 0 && sel2->nrcols == 0) { - sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"and":"or", bt, bt, F_FUNC, true, true); - assert(f); - return stmt_binop(be, sel1, sel2, NULL, f); - } - if (sel1->nrcols == 0) { + if (sel1->nrcols == 0 && left) { stmt *predicate = bin_find_smallest_column(be, left); if (!reduce) { @@ -1048,25 +1018,54 @@ exp_bin_or(backend *be, sql_exp *e, stmt sel1 = stmt_uselect(be, predicate, sel1, cmp_equal, NULL, 0/*anti*/, 0); } } - if (sel2->nrcols == 0) { - stmt *predicate = bin_find_smallest_column(be, left); - - if (!reduce) { - predicate = stmt_const(be, predicate, sel2); - } else { + return sel1; +} + +static stmt * +exp_bin_disjunctive(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt *ext, stmt *cnt, stmt *sel, int depth, bool reduce, int push) +{ + sql_subtype *bt = sql_bind_localtype("bit"); + list *l = e->l; + node *n; + stmt *s = NULL, *cur = NULL; + int anti = is_anti(e); + + for (n = l->h; n; n = n->next) { + sql_exp *c = n->data; + + /* propagate the anti flag */ + if (anti) + set_anti(c); + s = exp_bin(be, c, left, right, grp, ext, cnt, reduce?sel:NULL, depth, reduce, push); + if (!s) + return s; + + if (reduce && s->nrcols == 0 && left) { + stmt *predicate = bin_find_smallest_column(be, left); predicate = stmt_const(be, predicate, stmt_bool(be, 1)); - sel2 = stmt_uselect(be, predicate, sel2, cmp_equal, NULL, 0/*anti*/, 0); - } - } - if (!reduce) { - sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"and":"or", bt, bt, F_FUNC, true, true); - assert(f); - return stmt_binop(be, sel1, sel2, NULL, f); - } - if (anti) - return stmt_project(be, stmt_tinter(be, sel1, sel2, false), sel1); - return stmt_tunion(be, sel1, sel2); -} + s = stmt_uselect(be, predicate, s, cmp_equal, sel, anti, is_semantics(c)); + } else if (reduce && c->type != e_cmp) { + s = stmt_uselect(be, s, stmt_bool(be, 1), cmp_equal, sel, 0, 0); + } + if (cur) { + if (reduce) { + if (anti) + s = stmt_project(be, stmt_tinter(be, s, cur, false), s); + else + s = stmt_tunion(be, s, cur); + } else { + sql_subfunc *f = sql_bind_func(be->mvc, "sys", anti?"and":"or", bt, bt, F_FUNC, true, true); + assert(f); + s = stmt_binop(be, cur, s, NULL, f); + } + } + cur = s; + } + //if (reduce) + // return stmt_uselect(be, cur, stmt_bool(be, 1), cmp_equal, sel, 0/*anti*/, 0); + return cur; +} + static stmt * exp2bin_case(backend *be, sql_exp *fe, stmt *left, stmt *right, stmt *isel, int depth) @@ -2282,11 +2281,10 @@ exp_bin(backend *be, sql_exp *e, stmt *l } if (e->flag == cmp_in || e->flag == cmp_notin) return handle_in_exps(be, e->l, e->r, left, right, grp, ext, cnt, sel, (e->flag == cmp_in), depth, reduce, push); - if (e->flag == cmp_or && (!right || right->nrcols == 1)) - return exp_bin_or(be, e, left, right, grp, ext, cnt, sel, depth, reduce, push); - if (e->flag == cmp_or && right) { /* join */ - assert(0); - } + if (e->flag == cmp_con) + return exp_bin_conjunctive(be, e, left, right, grp, ext, cnt, sel, depth, reduce, push); + if (e->flag == cmp_dis) + return exp_bin_disjunctive(be, e, left, right, grp, ext, cnt, sel, depth, reduce, push); /* mark use of join indices */ if (right && find_prop(e->p, PROP_JOINIDX) != NULL) @@ -2365,7 +2363,10 @@ exp_bin(backend *be, sql_exp *e, stmt *l list_append(args, l); list_append(args, r); list_append(args, stmt_bool(be, 1)); - s = stmt_Nop(be, stmt_list(be, args), sel, f, NULL); + s = stmt_Nop(be, stmt_list(be, args), NULL, f, NULL); + /* TODO cleanup once candidates api has been stabalized */ + if (sel) + list_append(args, sel); } } else { s = stmt_binop(be, l, r, sel, f); @@ -2798,7 +2799,7 @@ exp2bin_args(backend *be, sql_exp *e, li case e_psm: return args; case e_cmp: - if (e->flag == cmp_or || e->flag == cmp_filter) { + if (e->flag == cmp_filter) { args = exps2bin_args(be, e->l, args); args = exps2bin_args(be, e->r, args); } else if (e->flag == cmp_in || e->flag == cmp_notin) { @@ -3688,7 +3689,7 @@ get_equi_joins_first(mvc *sql, list *exp for (node *n = exps->h; n; n = n->next) { sql_exp *e = n->data; - assert(e->type == e_cmp && e->flag != cmp_in && e->flag != cmp_notin && e->flag != cmp_or); + assert(e->type == e_cmp && e->flag != cmp_in && e->flag != cmp_notin); if (is_equi_exp_(e)) list_append(new_exps, e); else @@ -4191,7 +4192,7 @@ rel2bin_semijoin(backend *be, sql_rel *r stmt *s = NULL; /* only handle simple joins here */ - if ((exp_has_func(e) && e->flag != cmp_filter) || e->flag == cmp_or || (e->f && is_anti(e))) { + if ((exp_has_func(e) && e->flag != cmp_filter) || (e->f && is_anti(e))) { if (!join && !list_length(lje)) { stmt *l = bin_find_smallest_column(be, left); stmt *r = bin_find_smallest_column(be, right); @@ -5176,6 +5177,19 @@ rel2bin_project(backend *be, sql_rel *re return NULL; } } + if (!lpiv) { + stmt *orderbycolstmt = pl->h->data; + //stmt *orderbycolstmt = exp_bin(be, orderbycole, sub, psub, NULL, NULL, NULL, NULL, 0, 0, 0); + + if (!orderbycolstmt) + return NULL; + + orderbycolstmt = column(be, orderbycolstmt); + limit = stmt_limit(be, orderbycolstmt, NULL, grp, stmt_atom_lng(be, 0), l, distinct, 0, 0, nr_obe, 1); + lpiv = limit; + if (!lpiv) + return NULL; + } limit = lpiv; if (limit && grp) diff --git a/sql/common/sql_list.c b/sql/common/sql_list.c --- a/sql/common/sql_list.c +++ b/sql/common/sql_list.c @@ -679,14 +679,21 @@ list_position(list *l, void *val) return -1; } -void * -list_fetch(list *l, int pos) +node * +list_fetch_node(list *l, int pos) { node *n = NULL; int i; for (n = l->h, i=0; n && i<pos; n = n->next, i++) ; + return n; +} + +void * +list_fetch(list *l, int pos) +{ + node *n = list_fetch_node(l, pos); if (n) return n->data; return NULL; diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h --- a/sql/include/sql_catalog.h +++ b/sql/include/sql_catalog.h @@ -169,9 +169,11 @@ typedef enum comp_type { cmp_notequal = 5, cmp_filter = 6, - cmp_or = 7, + cmp_in = 8, /* in value list */ _______________________________________________ checkin-list mailing list -- [email protected] To unsubscribe send an email to [email protected]
