Changeset: 5dd880ae4f97 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/5dd880ae4f97
Branch: default
Log Message:
Merges cmp-or-patterns branch
diffs (truncated from 796 to 300 lines):
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -520,7 +520,7 @@ handle_in_tuple_exps(backend *be, sql_ex
else
s = cursel;
}
- if (sel && !(depth || !reduce))
+ if (!depth && reduce)
s = stmt_uselect(be,
s->nrcols == 0?stmt_const(be,
bin_find_smallest_column(be, left), s): s,
stmt_bool(be, 1), cmp_equal, sel, 0, 0);
diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -423,91 +423,366 @@ exps_cse( mvc *sql, list *oexps, list *l
return res;
}
-static int
-are_equality_exps( list *exps, sql_exp **L)
+static inline int
+exp_col_key(sql_exp *e)
+{
+ return e->nid ? e->nid : e->alias.label;
+}
+
+static inline int
+exp_cmp_eq_unique_id(sql_exp *e)
+{
+ return exp_col_key(e->l);
+}
+
+static inline int
+exp_multi_col_key(list *l)
+{
+ int k = exp_col_key(l->h->data);
+ for (node *n = l->h->next; n; n = n->next) {
+ k <<= 4;
+ k ^= exp_col_key(n->data);
+ }
+ return k;
+}
+
+typedef struct exp_eq_col_values {
+ /* we need ->first in order to remove it from the list of cmp_eq exps
+ * in case that we find another occurrence (with a different value)
+ */
+ sql_exp* first;
+ sql_exp* col; /* column */
+ list *vs; /* list of values */
+} eq_cv;
+
+typedef struct exp_eq_multi_col_values {
+ /* we need ->first in order to remove it from the list of multi col
+ * cmp_eq exps in case that we find another occurrence (with different
values)
+ */
+ list *first;
+ list *cols; /* list of col exps */
+ list *lvs; /* list of lists of values */
+} eq_mcv;
+
+static bool
+detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh)
{
- sql_exp *l = *L;
-
- if (list_length(exps) == 1) {
- sql_exp *e = exps->h->data, *le = e->l, *re = e->r;
-
- if (e->type == e_cmp && e->flag == cmp_equal && le->card !=
CARD_ATOM && re->card == CARD_ATOM && !is_semantics(e)) {
- if (!l) {
- *L = l = le;
- if (!is_column(le->type))
- return 0;
+ bool col_multivalue_cmp_eq = false;
+ for (node *n = eqs->h; n; n = n->next ) {
+ sql_exp *e = n->data;
+ sql_exp *le = e->l, *re = e->r;
+
+ /* find the le in the hash and append the re in the hash value
(ea->list) */
+ bool found = false;
+
+ int key = eqh->key(le);
+ sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
+
+ for (;he && !found; he = he->chain) {
+ eq_cv *cv = he->value;
+ if (!exp_equal(le, cv->col)) {
+ cv->vs = append(cv->vs, re);
+ found = col_multivalue_cmp_eq = true;
+ /* remove this and the previous (->first)
occurrence (if exists) from eqs */
+ if (cv->first) {
+ list_remove_data(eqs, NULL, cv->first);
+ cv->first = NULL;
+ }
+ list_remove_node(eqs, NULL, n);
}
- return (exp_match(l, le));
+ }
+
+ if (!found) {
+ eq_cv *cv = SA_NEW(sql->sa, eq_cv);
+ cv->first = e;
+ cv->vs = sa_list(sql->sa);
+ cv->vs = append(cv->vs, re);
+ cv->col = le;
+
+ hash_add(eqh, key, cv);
}
- if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) &&
!is_semantics(e))
- return (are_equality_exps(e->l, L) &&
are_equality_exps(e->r, L));
}
- return 0;
+ return col_multivalue_cmp_eq;
+}
+
+static bool
+detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh)
+{
+ /* we get as input a list of AND associated expressions (hence the
entries are lists themselves)
+ * we need to detect cmp_eq-only AND-associated expressions with the
same columns so we can
+ * group together their values
+ * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]]
has
+ * - (m,k,l) group with a single value (20, 100, 3000)
+ * - (n,k) group with two values (1, 10) and (2, 20)
+ * at the end we return true only if we have at least a group of
columns with more than a single value
+ * e.g. in this example (n,k)
+ */
+ bool multi_multivalue_cmp_eq = false;
+ for (node *n = mce_ands->h; n; n = n->next) {
+ list *l = n->data;
+
+ /* sort the list of the cmp_eq expressions based on the col exp
+ * NOTE: from now on we only work with the sorted list, sl */
+ list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL);
+ list_append_before(mce_ands, n, sl);
+ list_remove_node(mce_ands, NULL, n);
+
+ /* find the eq exp in the hash and append the values */
+ bool found = false;
+
+ int key = meqh->key(sl);
+ sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
+
+ for (;he && !found; he = he->chain) {
+ /* compare the values of the hash_entry with the cols
under cmp_eq from the list */
+ bool same_cols = true;
+ eq_mcv *mcv = he->value;
+ for (node *m = sl->h, *k = mcv->cols->h; m && k &&
same_cols; m = m->next, k = k->next) {
+ sql_exp *col_exp = ((sql_exp*)m->data)->l;
+ if (exp_equal(col_exp, k->data))
+ same_cols = false;
+ }
+ if (same_cols) {
+ /* we found the same multi cmp_eq exp in
mce_ands list multiple times! */
+ found = multi_multivalue_cmp_eq = true;
+ /* gather all the values of the list and add
them to the hash entry */
+ list *atms = sa_list(sql->sa);
+ for (node *m = sl->h; m; m = m->next)
+ atms = append(atms,
((sql_exp*)m->data)->r);
+ mcv->lvs = append(mcv->lvs, atms);
+ /* remove this and the previous occurrence
(which means that's the first time
+ * that we found the *same* multi cmp_eq exp)
+ */
+ if (mcv->first) {
+ list_remove_data(mce_ands, NULL,
mcv->first);
+ mcv->first = NULL;
+ }
+ list_remove_data(mce_ands, NULL, sl);
+ }
+ }
+
+ if (!found) {
+ eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv);
+ mcv->first = sl;
+ mcv->cols = sa_list(sql->sa);
+ for (node *m = sl->h; m; m = m->next)
+ mcv->cols = append(mcv->cols,
((sql_exp*)m->data)->l);
+ /* for the list of values (atoms) create a list and
append it to the lvs list */
+ list *atms = sa_list(sql->sa);
+ for (node *m = sl->h; m; m = m->next)
+ atms = append(atms, ((sql_exp*)m->data)->r);
+ mcv->lvs = sa_list(sql->sa);
+ mcv->lvs = append(mcv->lvs, atms);
+
+ hash_add(meqh, key, mcv);
+ }
+ }
+ return multi_multivalue_cmp_eq;
}
static void
-get_exps( list *n, list *l )
+exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands,
list **eqs, list **noneq)
{
- sql_exp *e = l->h->data, *re = e->r;
-
- if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM)
- list_append(n, re);
- if (e->type == e_cmp && e->flag == cmp_or) {
- get_exps(n, e->l);
- get_exps(n, e->r);
+ /* identify three different groups
+ * 1. gen_ands: lists of generic expressions (their inner association
is AND)
+ * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^)
+ * 3. eqs: equality expressions
+ * 4. neq: non equality col expressions
+ *
+ * return true if there is an exp with more than one cmp_eq
+ */
+ bool eq_only = true;
+ for (node *n = exps->h; n && eq_only; n = n->next) {
+ sql_exp *e = n->data;
+ sql_exp *le = e->l, *re = e->r;
+ eq_only &= (e->type == e_cmp && e->flag == cmp_equal &&
+ le->card != CARD_ATOM && is_column(le->type) &&
+ re->card == CARD_ATOM && !is_semantics(e));
+ }
+
+ if (list_length(exps) > 1) {
+ if (eq_only)
+ *mce_ands = append(*mce_ands, exps);
+ else
+ *gen_ands = append(*gen_ands, exps);
+ } else if (list_length(exps) == 1) {
+ sql_exp *se = exps->h->data;
+ sql_exp *le = se->l, *re = se->r;
+
+ if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
+ /* for a cmp_or expression go down the tree */
+ exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands,
eqs, noneq);
+ exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands,
eqs, noneq);
+
+ } else if (eq_only) {
+ *eqs = append(*eqs, se);
+ } else {
+ *noneq = append(*noneq, se);
+ }
}
}
-static sql_exp *
-equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r)
+static list *
+generate_single_col_cmp_in(mvc *sql, sql_hash *eqh)
{
- list *nl = new_exp_list(sql->sa);
-
- get_exps(nl, l);
- get_exps(nl, r);
-
- return exp_in( sql->sa, ce, nl, cmp_in);
+ /* from single col cmp_eq with multiple atoms in the hash generate
+ * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs())
+ */
+ list *ins = new_exp_list(sql->sa);
+ for (int i = 0; i < eqh->size; i++) {
+ sql_hash_e *he = eqh->buckets[i];
+
+ while (he) {
+ eq_cv *cv = he->value;
+ /* NOTE: cmp_eq expressions with a single entry are
still in eqs */
+ if (list_length(cv->vs) > 1)
+ ins = append(ins, exp_in(sql->sa, cv->col,
cv->vs, cmp_in));
+ he = he->chain;
+ }
+ }
+ return ins;
+}
+
+static list *
+generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh)
+{
+ /* from multivalue cmp_eq with multiple lists of atoms in the hash
generate
+ * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ...
]"
+ * (see detect_multicol_cmp_eqs())
+ */
+ list *ins = new_exp_list(sql->sa);
+ for (int i = 0; i < meqh->size; i++) {
+ sql_hash_e *he = meqh->buckets[i];
+ while (he) {
+ eq_mcv *mcv = he->value;
+ /* NOTE: multivalue cmp_eq expressions with a single
entry are still in mce_ands */
+ if (list_length(mcv->lvs) > 1) {
+ sql_exp *mc = exp_label(sql->sa,
exp_values(sql->sa, mcv->cols), ++sql->label);
+ for (node *a = mcv->lvs->h; a; a = a->next)
+ a->data = exp_values(sql->sa, a->data);
+ ins = append(ins, exp_in(sql->sa, mc, mcv->lvs,
cmp_in));
+ }
+ he = he->chain;
+ }
+ }
+ return ins;
}
static list *
merge_ors(mvc *sql, list *exps, int *changes)
{
- list *nexps = NULL;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]