Changeset: b132e9477d66 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b132e9477d66 Modified Files: monetdb5/modules/mal/pcre.mal sql/backends/monet5/LSST/Tests/lsst_htmxmatch.sql sql/backends/monet5/rel_bin.c sql/backends/monet5/sql_gencode.c sql/backends/monet5/sql_statement.c sql/backends/monet5/sql_statement.h sql/benchmarks/tpch/Tests/02-plan.stable.out sql/benchmarks/tpch/Tests/09-plan.stable.out.int128 sql/benchmarks/tpch/Tests/13-plan.stable.out sql/benchmarks/tpch/Tests/16-plan.stable.out sql/benchmarks/tpch/Tests/17-explain.stable.out.int128 sql/benchmarks/tpch/Tests/17-plan.stable.out.int128 sql/benchmarks/tpch/Tests/20-explain.stable.out.int128 sql/benchmarks/tpch/Tests/20-plan.stable.out.int128 sql/server/rel_dump.c sql/server/rel_exp.c sql/server/rel_exp.h sql/server/rel_optimizer.c sql/server/rel_select.c sql/server/sql_parser.y sql/test/BugTracker-2009/Tests/primekeyconstraint.SF-2783425.stable.err sql/test/leaks/Tests/check1.stable.out.int128 sql/test/leaks/Tests/check2.stable.out.int128 sql/test/leaks/Tests/check3.stable.out.int128 sql/test/leaks/Tests/check4.stable.out.int128 sql/test/leaks/Tests/check5.stable.out.int128 sql/test/leaks/Tests/select1.stable.out.int128 sql/test/leaks/Tests/select2.stable.out.int128 Branch: default Log Message:
generalized filter functions (old version was limited to l filter (r, option), ie for like filters). New syntax is [ l0, l1 .., ln ] Filter_Op [ r0, r1, .., rn ] where l0 and r0 are the columns to join/select over. Extra columns/options can be passed using the l1 to ln and r1 to rn. Currently the filter functions could be added to the system using create filter function "name"( scalar types) external name mod.name; In mal we assume (require) mod.name functions to exist for the subselect and subjoin variations. See subjoin/select and likesubselect variants. Also bit returning map operators are needed (if select/join isn't the first operator on a relation). These have the form (l0, ..ln, r0, ..rn):= bit. For now the name should be 'mod'.sqlname. Where the sqlname is the name given to the function within sql. Probably we cleanup this requirement and add some documentation ;-). diffs (truncated from 1839 to 300 lines): diff --git a/monetdb5/modules/mal/pcre.mal b/monetdb5/modules/mal/pcre.mal --- a/monetdb5/modules/mal/pcre.mal +++ b/monetdb5/modules/mal/pcre.mal @@ -118,6 +118,25 @@ address BATPCREnotilike; command batstr.not_ilike(s:bat[:oid,:str], pat:str):bat[:oid,:bit] address BATPCREnotilike2; +command algebra.like(s:str, pat:str, esc:str):bit address PCRElike3; +command algebra.like(s:str, pat:str):bit address PCRElike2; +command algebra.not_like(s:str, pat:str, esc:str):bit address PCREnotlike3; +command algebra.not_like(s:str, pat:str):bit address PCREnotlike2; +command algebra.ilike(s:str, pat:str, esc:str):bit address PCREilike3; +command algebra.ilike(s:str, pat:str):bit address PCREilike2; +command algebra.not_ilike(s:str, pat:str, esc:str):bit address PCREnotilike3; +command algebra.not_ilike(s:str, pat:str):bit address PCREnotilike2; + +module batalgebra; +command batalgebra.like(s:bat[:oid,:str], pat:str, esc:str):bat[:oid,:bit] address BATPCRElike; +command batalgebra.like(s:bat[:oid,:str], pat:str):bat[:oid,:bit] address BATPCRElike2; +command batalgebra.not_like(s:bat[:oid,:str], pat:str, esc:str):bat[:oid,:bit] address BATPCREnotlike; +command batalgebra.not_like(s:bat[:oid,:str], pat:str):bat[:oid,:bit] address BATPCREnotlike2; +command batalgebra.ilike(s:bat[:oid,:str], pat:str, esc:str):bat[:oid,:bit] address BATPCREilike; +command batalgebra.ilike(s:bat[:oid,:str], pat:str):bat[:oid,:bit] address BATPCREilike2; +command batalgebra.not_ilike(s:bat[:oid,:str], pat:str, esc:str):bat[:oid,:bit] address BATPCREnotilike; +command batalgebra.not_ilike(s:bat[:oid,:str], pat:str):bat[:oid,:bit] address BATPCREnotliike2; + command algebra.likesubselect(b:bat[:oid,:str], pat:str, esc:str, caseignore:bit, anti:bit) :bat[:oid,:oid] address PCRElikesubselect1 comment "Select all head values for which the tail value is \"like\" diff --git a/sql/backends/monet5/LSST/Tests/lsst_htmxmatch.sql b/sql/backends/monet5/LSST/Tests/lsst_htmxmatch.sql --- a/sql/backends/monet5/LSST/Tests/lsst_htmxmatch.sql +++ b/sql/backends/monet5/LSST/Tests/lsst_htmxmatch.sql @@ -5,9 +5,9 @@ insert into htm values (120), (121), (12 insert into htm values (130), (131), (132), (133); -- select identical pairs -select * from htm a, htm b where a.id xmatch(0) b.id; +select * from htm a, htm b where [a.id] xmatch [b.id,0]; -- select pairs at distance one -select * from htm a, htm b where a.id xmatch(1) b.id; +select * from htm a, htm b where [a.id] xmatch [b.id,1]; drop table htm; diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -550,13 +550,54 @@ exp_bin(mvc *sql, sql_exp *e, stmt *left int swapped = 0, is_select = 0; sql_exp *re = e->r, *re2 = e->f; + /* general predicate, select and join */ if (get_cmp(e) == cmp_filter) { - list *r = e->r; - - re2 = NULL; - re = r->h->data; - if (r->h->next) - re2 = r->h->next->data; + list *args; + list *ops; + node *n; + int first = 1; + + ops = sa_list(sql->sa); + args = e->l; + for( n = args->h; n; n = n->next ) { + s = NULL; + if (!swapped) + s = exp_bin(sql, n->data, left, NULL, grp, ext, cnt, NULL); + if (!s && (first || swapped)) { + s = exp_bin(sql, n->data, right, NULL, grp, ext, cnt, NULL); + swapped = 1; + } + if (!s) + return s; + if (s->nrcols == 0 && first) + s = stmt_const(sql->sa, bin_first_column(sql->sa, swapped?right:left), s); + list_append(ops, s); + first = 0; + } + l = stmt_list(sql->sa, ops); + ops = sa_list(sql->sa); + args = e->r; + for( n = args->h; n; n = n->next ) { + s = exp_bin(sql, n->data, (swapped || !right)?left:right, NULL, grp, ext, cnt, NULL); + if (!s) + return s; + list_append(ops, s); + } + r = stmt_list(sql->sa, ops); + + if (left && right && exps_card(e->r) > CARD_ATOM) { + sql_subfunc *f = e->f; + stmt *j = stmt_genjoin(sql->sa, l, r, f, swapped); + + if (j && is_anti(e)) + j->flag |= ANTI; + return j; + } + assert(!swapped); + s = stmt_genselect(sql->sa, l, r, e->f, sel); + if (s && is_anti(e)) + s->flag |= ANTI; + return s; } if (e->flag == cmp_in || e->flag == cmp_notin) { return handle_in_exps(sql, e->l, e->r, left, right, grp, ext, cnt, sel, (e->flag == cmp_in), 0); @@ -630,37 +671,12 @@ exp_bin(mvc *sql, sql_exp *e, stmt *left } if (re2) r2 = exp_bin(sql, re2, left, right, grp, ext, cnt, sel); + if (!l || !r || (re2 && !r2)) { assert(0); return NULL; } - /* general predicate, select and join */ - if (get_cmp(e) == cmp_filter) { - list *ops; - - if (l->nrcols == 0) - l = stmt_const(sql->sa, bin_first_column(sql->sa, swapped?right:left), l); - - if (left && right && re->card > CARD_ATOM && !is_select) { - /* find predicate function */ - sql_subfunc *f = e->f; - stmt *j = stmt_joinN(sql->sa, l, r, r2, f, swapped); - - if (j && is_anti(e)) - j->flag |= ANTI; - return j; - } - ops = sa_list(sql->sa); - append(ops, r); - if (r2) - append(ops, r2); - r = stmt_list(sql->sa, ops); - s = stmt_genselect(sql->sa, l, r, e->f, sel); - if (s && is_anti(e)) - s->flag |= ANTI; - return s; - } if (left && right && !is_select && ((l->nrcols && (r->nrcols || (r2 && r2->nrcols))) || re->card > CARD_ATOM || diff --git a/sql/backends/monet5/sql_gencode.c b/sql/backends/monet5/sql_gencode.c --- a/sql/backends/monet5/sql_gencode.c +++ b/sql/backends/monet5/sql_gencode.c @@ -696,8 +696,8 @@ dump_joinN(backend *sql, MalBlkPtr mb, s { char *mod, *fimp; InstrPtr q; - int op1, op2, op3 = 0; bit swapped = (s->flag & SWAPPED) ? TRUE : FALSE; + node *n; if (backend_create_subfunc(sql, s->op4.funcval, NULL) < 0) return -1; @@ -705,18 +705,23 @@ dump_joinN(backend *sql, MalBlkPtr mb, s fimp = sql_func_imp(s->op4.funcval->func); /* dump left and right operands */ - op1 = _dumpstmt(sql, mb, s->op1); - op2 = _dumpstmt(sql, mb, s->op2); - if (s->op3) - op3 = _dumpstmt(sql, mb, s->op3); + _dumpstmt(sql, mb, s->op1); + _dumpstmt(sql, mb, s->op2); /* filter qualifying tuples, return oids of h and tail */ q = newStmt(mb, mod, fimp); q = pushReturn(mb, q, newTmpVariable(mb, TYPE_any)); - q = pushArgument(mb, q, op1); - q = pushArgument(mb, q, op2); - if (s->op3) - q = pushArgument(mb, q, op3); + for (n = s->op1->op4.lval->h; n; n = n->next) { + stmt *op = n->data; + + q = pushArgument(mb, q, op->nr); + } + + for (n = s->op2->op4.lval->h; n; n = n->next) { + stmt *op = n->data; + + q = pushArgument(mb, q, op->nr); + } s->nr = getDestVar(q); if (swapped) { @@ -1155,10 +1160,10 @@ static int renameVariable(mb, getArg(q, 2), "r2_%d", s->nr); } break; case st_uselect:{ - bit need_not; + bit need_not = FALSE; int l, r, sub, anti; + node *n; - need_not = FALSE; if ((l = _dumpstmt(sql, mb, s->op1)) < 0) return -1; if ((r = _dumpstmt(sql, mb, s->op2)) < 0) @@ -1172,8 +1177,7 @@ static int if (s->op2->nrcols >= 1) { char *mod = calcRef; char *op = "="; - int k; - int op3 = -1; + int k, done = 0; switch (get_cmp(s)) { case cmp_equal: @@ -1194,36 +1198,33 @@ static int case cmp_gte: op = ">="; break; - case cmp_filter:{ - sql_subfunc *f; - char *fname = s->op4.funcval->func->base.name; - stmt *p2 = ((stmt *) s->op2->op4.lval->h->data), *p3 = NULL; - + case cmp_filter: + done = 1; op = sql_func_imp(s->op4.funcval->func); mod = sql_func_mod(s->op4.funcval->func); - assert(anti == 0); - r = p2->nr; - if (s->op2->op4.lval->h->next) { - p3 = s->op2->op4.lval->h->next->data; - - op3 = p3->nr; + q = newStmt(mb, "mal", "multiplex"); + setVarType(mb, getArg(q, 0), newBatType(TYPE_oid, TYPE_bit)); + setVarUDFtype(mb, getArg(q, 0)); + q = pushStr(mb, q, convertMultiplexMod(mod, op)); + q = pushStr(mb, q, convertMultiplexFcn(op)); + for (n = s->op1->op4.lval->h; n; n = n->next) { + stmt *op = n->data; + q = pushArgument(mb, q, op->nr); } - if ((!p3 && (f = sql_bind_func(sql->mvc->sa, mvc_bind_schema(sql->mvc, "sys"), fname, tail_type(s->op1), tail_type(p2), F_FUNC)) != NULL) || - (p3 && (f = sql_bind_func3(sql->mvc->sa, mvc_bind_schema(sql->mvc, "sys"), fname, tail_type(s->op1), tail_type(p2), tail_type(p3), F_FUNC)) != NULL)) { - op = sql_func_imp(f->func); - mod = sql_func_mod(f->func); + for (n = s->op2->op4.lval->h; n; n = n->next) { + stmt *op = n->data; + q = pushArgument(mb, q, op->nr); } - } + if (q == NULL) + return -1; break; default: showException(GDKout, SQL, "sql", "Unknown operator"); } - if ((q = multiplex2(mb, mod, convertOperator(op), l, r, TYPE_bit)) == NULL) + if (!done && (q = multiplex2(mb, mod, convertOperator(op), l, r, TYPE_bit)) == NULL) return -1; - if (op3 > 0) - q = pushArgument(mb, q, op3); k = getDestVar(q); q = newStmt1(mb, algebraRef, "subselect"); @@ -1234,7 +1235,7 @@ static int q = pushBit(mb, q, !need_not); q = pushBit(mb, q, TRUE); q = pushBit(mb, q, TRUE); - q = pushBit(mb, q, FALSE); + q = pushBit(mb, q, anti); if (q == NULL) return -1; k = getDestVar(q); @@ -1257,12 +1258,17 @@ static int q = newStmt(mb, mod, convertOperator(fimp)); // push pointer to the SQL structure into the MAL call // allows getting argument names for example - if (LANG_EXT(f->lang)) - q = pushPtr(mb, q, f); - // f->query contains the R code to be run - if (f->lang == FUNC_LANG_R) - q = pushStr(mb, q, f->query); - q = pushArgument(mb, q, l); + if (LANG_EXT(f->lang)) + q = pushPtr(mb, q, f); + // f->query contains the R code to be run + if (f->lang == FUNC_LANG_R) + q = pushStr(mb, q, f->query); + + for (n = s->op1->op4.lval->h; n; n = n->next) { + stmt *op = n->data; + + q = pushArgument(mb, q, op->nr); + } if (sub > 0) q = pushArgument(mb, q, sub); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list