Changeset: 328fc2b94488 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/328fc2b94488
Modified Files:
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql_statement.c
sql/server/rel_optimizer.c
Branch: groupjoin
Log Message:
handle old setjoins now with semi/anti and/or groupjoins
diffs (truncated from 732 to 300 lines):
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1663,6 +1663,18 @@ exp_bin(backend *be, sql_exp *e, stmt *l
}
r = stmt_list(be, ops);
+ if (!reduce) {
+ sql_subfunc *f = e->f;
+ list *ops = sa_list(sql->sa);
+ for (node *n = l->op4.lval->h ; n ; n = n->next)
+ append(ops, n->data);
+ for (node *n = r->op4.lval->h ; n ; n = n->next)
+ append(ops, n->data);
+ if (!(s = stmt_Nop(be, stmt_list(be, ops), sel,
f, NULL)))
+ return NULL;
+ return s;
+ }
+
if (left && right && (exps_card(e->r) != CARD_ATOM ||
!exps_are_atoms(e->r))) {
sql_subfunc *f = e->f;
for (node *n = l->op4.lval->h ; n ; n = n->next)
@@ -2611,84 +2623,90 @@ releqjoin(backend *be, list *l1, list *l
return res;
}
+static bool
+can_join_exp(sql_rel *rel, sql_exp *e, bool anti)
+{
+ bool can_join = 0;
+
+ if (e->type == e_cmp) {
+ int flag = e->flag;
+ /* check if its a select or join expression, ie use only
expressions of one relation left and of the other right (than join) */
+ if (flag < cmp_filter || flag == mark_in || flag == mark_notin)
{ /* theta and range joins */
+ /* join or select ? */
+ sql_exp *l = e->l, *r = e->r, *f = e->f;
+
+ if (f) {
+ int ll = rel_find_exp(rel->l, l) != NULL;
+ int rl = rel_find_exp(rel->r, l) != NULL;
+ int lr = rel_find_exp(rel->l, r) != NULL;
+ int rr = rel_find_exp(rel->r, r) != NULL;
+ int lf = rel_find_exp(rel->l, f) != NULL;
+ int rf = rel_find_exp(rel->r, f) != NULL;
+ int nrcr1 = 0, nrcr2 = 0, nrcl1 = 0, nrcl2 = 0;
+
+ if ((ll && !rl &&
+ ((rr && !lr) || (nrcr1 = r->card ==
CARD_ATOM && exp_is_atom(r))) &&
+ ((rf && !lf) || (nrcr2 = f->card ==
CARD_ATOM && exp_is_atom(f))) && (nrcr1+nrcr2) <= 1) ||
+ (rl && !ll &&
+ ((lr && !rr) || (nrcl1 = r->card ==
CARD_ATOM && exp_is_atom(r))) &&
+ ((lf && !rf) || (nrcl2 = f->card ==
CARD_ATOM && exp_is_atom(f))) && (nrcl1+nrcl2) <= 1)) {
+ can_join = 1;
+ }
+ } else {
+ int ll = 0, lr = 0, rl = 0, rr = 0, cst = 0;
+ if (l->card != CARD_ATOM || !exp_is_atom(l)) {
+ ll = rel_find_exp(rel->l, l) != NULL;
+ rl = rel_find_exp(rel->r, l) != NULL;
+ } else if (anti) {
+ ll = 1;
+ cst = 1;
+ }
+ if (r->card != CARD_ATOM || !exp_is_atom(r)) {
+ lr = rel_find_exp(rel->l, r) != NULL;
+ rr = rel_find_exp(rel->r, r) != NULL;
+ } else if (anti) {
+ rr = cst?0:1;
+ }
+ if ((ll && !lr && !rl && rr) || (!ll && lr &&
rl && !rr))
+ can_join = 1;
+ }
+ } else if (flag == cmp_filter) {
+ list *l = e->l, *r = e->r;
+ int ll = 0, lr = 0, rl = 0, rr = 0;
+
+ for (node *n = l->h ; n ; n = n->next) {
+ sql_exp *ee = n->data;
+
+ if (ee->card != CARD_ATOM || !exp_is_atom(ee)) {
+ ll |= rel_find_exp(rel->l, ee) != NULL;
+ rl |= rel_find_exp(rel->r, ee) != NULL;
+ }
+ }
+ for (node *n = r->h ; n ; n = n->next) {
+ sql_exp *ee = n->data;
+
+ if (ee->card != CARD_ATOM || !exp_is_atom(ee)) {
+ lr |= rel_find_exp(rel->l, ee) != NULL;
+ rr |= rel_find_exp(rel->r, ee) != NULL;
+ }
+ }
+ if ((ll && !lr && !rl && rr) || (!ll && lr && rl &&
!rr))
+ can_join = 1;
+ }
+ }
+ return can_join;
+}
+
static void
split_join_exps(sql_rel *rel, list *joinable, list *not_joinable, bool anti)
{
if (!list_empty(rel->exps)) {
for (node *n = rel->exps->h; n; n = n->next) {
sql_exp *e = n->data;
- int can_join = 0;
/* we can handle thetajoins, rangejoins and filter
joins (like) */
/* ToDo how about atom expressions? */
- if (e->type == e_cmp) {
- int flag = e->flag;
- /* check if its a select or join expression, ie
use only expressions of one relation left and of the other right (than join) */
- if (flag < cmp_filter || flag == mark_in ||
flag == mark_notin) { /* theta and range joins */
- /* join or select ? */
- sql_exp *l = e->l, *r = e->r, *f = e->f;
-
- if (f) {
- int ll = rel_find_exp(rel->l,
l) != NULL;
- int rl = rel_find_exp(rel->r,
l) != NULL;
- int lr = rel_find_exp(rel->l,
r) != NULL;
- int rr = rel_find_exp(rel->r,
r) != NULL;
- int lf = rel_find_exp(rel->l,
f) != NULL;
- int rf = rel_find_exp(rel->r,
f) != NULL;
- int nrcr1 = 0, nrcr2 = 0, nrcl1
= 0, nrcl2 = 0;
-
- if ((ll && !rl &&
- ((rr && !lr) || (nrcr1 =
r->card == CARD_ATOM && exp_is_atom(r))) &&
- ((rf && !lf) || (nrcr2 =
f->card == CARD_ATOM && exp_is_atom(f))) && (nrcr1+nrcr2) <= 1) ||
- (rl && !ll &&
- ((lr && !rr) || (nrcl1 =
r->card == CARD_ATOM && exp_is_atom(r))) &&
- ((lf && !rf) || (nrcl2 =
f->card == CARD_ATOM && exp_is_atom(f))) && (nrcl1+nrcl2) <= 1)) {
- can_join = 1;
- }
- } else {
- int ll = 0, lr = 0, rl = 0, rr
= 0, cst = 0;
-
- if (l->card != CARD_ATOM ||
!exp_is_atom(l)) {
- ll =
rel_find_exp(rel->l, l) != NULL;
- rl =
rel_find_exp(rel->r, l) != NULL;
- } else if (anti) {
- ll = 1;
- cst = 1;
- }
- if (r->card != CARD_ATOM ||
!exp_is_atom(r)) {
- lr =
rel_find_exp(rel->l, r) != NULL;
- rr =
rel_find_exp(rel->r, r) != NULL;
- } else if (anti) {
- rr = cst?0:1;
- }
- if ((ll && !lr && !rl && rr) ||
(!ll && lr && rl && !rr))
- can_join = 1;
- }
- } else if (flag == cmp_filter) {
- list *l = e->l, *r = e->r;
- int ll = 0, lr = 0, rl = 0, rr = 0;
-
- for (node *n = l->h ; n ; n = n->next) {
- sql_exp *ee = n->data;
-
- if (ee->card != CARD_ATOM ||
!exp_is_atom(ee)) {
- ll |=
rel_find_exp(rel->l, ee) != NULL;
- rl |=
rel_find_exp(rel->r, ee) != NULL;
- }
- }
- for (node *n = r->h ; n ; n = n->next) {
- sql_exp *ee = n->data;
-
- if (ee->card != CARD_ATOM ||
!exp_is_atom(ee)) {
- lr |=
rel_find_exp(rel->l, ee) != NULL;
- rr |=
rel_find_exp(rel->r, ee) != NULL;
- }
- }
- if ((ll && !lr && !rl && rr) || (!ll &&
lr && rl && !rr))
- can_join = 1;
- }
- }
- if (can_join) {
+ if (can_join_exp(rel, e, anti)) {
append(joinable, e);
} else {
append(not_joinable, e);
@@ -2697,29 +2715,47 @@ split_join_exps(sql_rel *rel, list *join
}
}
-#define is_equi_exp_(e) ((e)->flag == cmp_equal || (e)->flag == mark_in)
+
+static int
+exp_is_mark(sql_exp *e)
+{
+ if (e->type == e_cmp && (e->flag == mark_in || e->flag == mark_notin))
+ return 1;
+ return 0;
+}
static list *
-get_equi_joins_first(mvc *sql, list *exps, int *equality_only)
+mark_joins_last(sql_allocator *sa, sql_rel *rel)
{
- list *new_exps = sa_list(sql->sa);
-
- for (node *n = exps->h; n; n = n->next) {
- sql_exp *e = n->data;
-
- assert(e->type == e_cmp && e->flag != cmp_in && e->flag !=
cmp_notin && e->flag != cmp_or);
- if (is_equi_exp_(e))
- list_append(new_exps, e);
- else
- *equality_only = 0;
- }
- for (node *n = exps->h; n; n = n->next) {
- sql_exp *e = n->data;
-
- if (!is_equi_exp_(e))
- list_append(new_exps, e);
- }
- return new_exps;
+ list *exps = sa_list(sa);
+
+ if (!list_empty(rel->exps)) {
+ for (node *n = rel->exps->h; n; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (!exp_is_mark(e))
+ append(exps, e);
+ }
+ for (node *n = rel->exps->h; n; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (exp_is_mark(e))
+ append(exps, e);
+ }
+ assert(list_length(exps) == list_length(rel->exps));
+ }
+ return exps;
+}
+
+static bool
+can_outerjoin_exp(sql_rel *rel, sql_exp *e, bool anti)
+{
+ if (can_join_exp(rel, e, anti)) {
+ if (e->flag != cmp_equal && e->flag != mark_in && e->flag !=
mark_notin && rel->op == op_left)
+ return false;
+ return true;
+ }
+ return false;
}
static stmt *
@@ -2727,7 +2763,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
{
mvc *sql = be->mvc;
list *l;
- node *n, *en = rel->exps->h;
+ node *n , *en;
stmt *left = NULL, *right = NULL, *join = NULL, *jl, *jr, *res;
if (rel->l) /* first construct the left sub relation */
@@ -2741,15 +2777,17 @@ rel2bin_groupjoin(backend *be, sql_rel *
left = row2cols(be, left);
right = row2cols(be, right);
- /*
- * split in 2 steps,
- * first cheap join(s) (equality or idx)
- * second selects/filters
- */
assert(!list_empty(rel->exps));
- if (!list_empty(rel->exps)) {
- assert(!list_empty(rel->exps));
-
+ list *jexps = mark_joins_last(be->mvc->sa, rel);
+
+ en = jexps->h;
+ if (list_empty(jexps) || !can_outerjoin_exp(rel, en->data, true)) {
+ printf("# outer cross\n");
+ stmt *l = bin_find_smallest_column(be, left);
+ stmt *r = bin_find_smallest_column(be, right);
+
+ join = stmt_join_cand(be, column(be, l), column(be, r),
left->cand, NULL/*right->cand*/, 0, cmp_all, 0, 0, false, rel->op ==
op_left?false:true);
+ } else if (!list_empty(jexps)) {
/* markthetajoin()
* or left-join followed by markthetaselect */
sql_exp *e = en->data;
@@ -2762,12 +2800,27 @@ rel2bin_groupjoin(backend *be, sql_rel *
*/
en = en->next;
assert(en || (e->type == e_cmp && (e->flag == mark_in ||
e->flag == mark_notin)));
- stmt *l = exp_bin(be, e->l, left, NULL, NULL, NULL, NULL, NULL,
0, 1, 0);
+ stmt *l = exp_bin(be, e->l, left, NULL, NULL, NULL, NULL, NULL,
0, 1, 0), *r = NULL;
+ bool swap = false;
+
+ if (!l) {
+ swap = true;
+ l = exp_bin(be, e->l, right, NULL, NULL, NULL, NULL,
NULL, 0, 1, 0);
+ }
+ if (!l)
+ return NULL;
+ if ((r = exp_bin(be, e->r, left, right, NULL, NULL, NULL, NULL,
0, 1, 0)) == NULL)
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]