Changeset: c69aeb4cb0cb for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c69aeb4cb0cb
Modified Files:
        sql/server/rel_exp.c
        sql/server/rel_exp.h
        sql/server/rel_optimizer.c
        sql/server/rel_optimizer.h
        sql/server/rel_unnest.c
Branch: antipush
Log Message:

Updated exps_unique with new property. batappend-undefined.Bug-7130 test now 
crashes, but I think it's from the bug in Jul2021 branch


diffs (237 lines):

diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -641,6 +641,19 @@ have_nil(list *exps)
        return has_nil;
 }
 
+int
+have_semantics(list *exps)
+{
+       int has_semantics = 0;
+
+       if (exps)
+               for (node *n = exps->h; n && !has_semantics; n = n->next) {
+                       sql_exp *e = n->data;
+                       has_semantics |= is_compare(e->type) && is_semantics(e);
+               }
+       return has_semantics;
+}
+
 sql_exp *
 exp_column(sql_allocator *sa, const char *rname, const char *cname, 
sql_subtype *t, unsigned int card, int has_nils, int unique, int intern)
 {
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -85,6 +85,7 @@ extern sql_exp * exp_values(sql_allocato
 extern list * exp_get_values(sql_exp *e); /* get expression list from the 
values expression */
 extern list * exp_types(sql_allocator *sa, list *exps);
 extern int have_nil(list *exps);
+extern int have_semantics(list *exps);
 
 sql_export sql_exp * exp_column(sql_allocator *sa, const char *rname, const 
char *name, sql_subtype *t, unsigned int card, int has_nils, int unique, int 
intern);
 extern sql_exp * exp_propagate(sql_allocator *sa, sql_exp *ne, sql_exp *oe);
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -2301,17 +2301,16 @@ exp_push_down_prj(mvc *sql, sql_exp *e, 
 }
 
 static int
-rel_is_unique( sql_rel *rel, sql_ukey *k)
+rel_is_unique(sql_rel *rel)
 {
        switch(rel->op) {
-       case op_left:
-       case op_right:
-       case op_full:
-       case op_join:
-               return 0;
        case op_semi:
        case op_anti:
-               return rel_is_unique(rel->l, k);
+       case op_inter:
+       case op_except:
+       case op_topn:
+       case op_sample:
+               return rel_is_unique(rel->l);
        case op_table:
        case op_basetable:
                return 1;
@@ -2320,43 +2319,48 @@ rel_is_unique( sql_rel *rel, sql_ukey *k
        }
 }
 
+/* WARNING exps_unique doesn't check for duplicate NULL values */
 int
 exps_unique(mvc *sql, sql_rel *rel, list *exps)
 {
-       node *n;
-       char *matched = NULL;
-       int nr = 0;
+       int nr = 0, need_check = 0;
        sql_ukey *k = NULL;
 
        if (list_empty(exps))
                return 0;
-       for(n = exps->h; n && !k; n = n->next) {
+       for(node *n = exps->h; n ; n = n->next) {
                sql_exp *e = n->data;
                prop *p;
 
-               if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL)
-                       k = p->value;
-       }
-       if (!k || list_length(k->k.columns) > list_length(exps))
+               if (!is_unique(e)) { /* ignore unique columns */
+                       need_check++;
+                       if (!k && (p = find_prop(e->p, PROP_HASHCOL))) /* at 
the moment, use only one k */
+                               k = p->value;
+               }
+       }
+       if (!need_check) /* all have unique property return */
+               return 1;
+       if (!k || list_length(k->k.columns) != need_check)
                return 0;
        if (rel) {
-               matched = SA_ZNEW_ARRAY(sql->sa, char, 
list_length(k->k.columns));
-               for(n = exps->h; n; n = n->next) {
+               char *matched = SA_ZNEW_ARRAY(sql->sa, char, 
list_length(k->k.columns));
+               fcmp cmp = (fcmp)&kc_column_cmp;
+               for(node *n = exps->h; n; n = n->next) {
                        sql_exp *e = n->data;
-                       fcmp cmp = (fcmp)&kc_column_cmp;
-                       sql_column *c = exp_find_column(rel, e, -2);
+                       sql_column *c;
                        node *m;
 
-                       if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) {
+                       if (is_unique(e))
+                               continue;
+                       if ((c = exp_find_column(rel, e, -2)) != NULL && (m = 
list_find(k->k.columns, c, cmp)) != NULL) {
                                int pos = list_position(k->k.columns, m->data);
                                if (!matched[pos])
                                        nr++;
                                matched[pos] = 1;
                        }
                }
-               if (nr == list_length(k->k.columns)) {
-                       return rel_is_unique(rel, k);
-               }
+               if (nr == list_length(k->k.columns))
+                       return rel_is_unique(rel);
        }
        return 0;
 }
@@ -2400,12 +2404,15 @@ rel_distinct_aggregate_on_unique_values(
 
                        if (exp->type == e_aggr && need_distinct(exp)) {
                                bool all_unique = true;
-
-                               for (node *m = ((list*)exp->l)->h; m && 
all_unique; m = m->next) {
+                               list *l = exp->l;
+
+                               for (node *m = l->h; m && all_unique; m = 
m->next) {
                                        sql_exp *arg = (sql_exp*) m->data;
 
                                        all_unique &= arg->type == e_column && 
is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
                                }
+                               if (!all_unique && exps_card(l) > CARD_ATOM)
+                                       all_unique = exps_unique(v->sql, rel, 
l) && (!is_semantics(exp) || !have_nil(l));
                                if (all_unique) {
                                        set_nodistinct(exp);
                                        v->changes++;
@@ -2555,7 +2562,8 @@ rel_distinct_project2groupby(visitor *v,
        /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
         * into project [ pk ] ( select/semijoin ( table )  */
        if (rel->op == op_project && rel->l && !rel->r /* no order by */ && 
need_distinct(rel) &&
-           (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, 
rel, rel->exps)) {
+           (l->op == op_select || l->op == op_semi) && exps_unique(v->sql, 
rel, rel->exps) &&
+               (!have_semantics(l->exps) || !have_nil(rel->exps))) {
                set_nodistinct(rel);
                v->changes++;
        }
@@ -5469,8 +5477,12 @@ static inline sql_rel *
 rel_push_project_down_union(visitor *v, sql_rel *rel)
 {
        /* first remove distinct if already unique */
-       if (rel->op == op_project && need_distinct(rel) && rel->exps && 
exps_unique(v->sql, rel, rel->exps))
+       if (rel->op == op_project && need_distinct(rel) && rel->exps && 
exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
                set_nodistinct(rel);
+               if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM) 
/* if the projection just contains constants, then no topN is needed */
+                       rel->l = rel_topn(v->sql->sa, rel->l, 
append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
+               v->changes++;
+       }
 
        if (rel->op == op_project && rel->l && rel->exps && !rel->r) {
                int need_distinct = need_distinct(rel);
@@ -5495,8 +5507,8 @@ rel_push_project_down_union(visitor *v, 
                        ur = rel_project(v->sql->sa, ur,
                                rel_projections(v->sql, ur, NULL, 1, 1));
                need_distinct = (need_distinct &&
-                               (!exps_unique(v->sql, ul, ul->exps) ||
-                                !exps_unique(v->sql, ur, ur->exps)));
+                               (!exps_unique(v->sql, ul, ul->exps) || 
have_nil(ul->exps) ||
+                                !exps_unique(v->sql, ur, ur->exps) || 
have_nil(ur->exps)));
                rel_rename_exps(v->sql, u->exps, ul->exps);
                rel_rename_exps(v->sql, u->exps, ur->exps);
 
@@ -5914,18 +5926,6 @@ rel_groupby_distinct(visitor *v, sql_rel
 {
        node *n;
 
-       if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps && 
list_empty(rel->r)) {
-               for (n = rel->exps->h; n; n = n->next) {
-                       sql_exp *e = n->data;
-
-                       if (exp_aggr_is_count(e) && need_distinct(e)) {
-                               /* if count over unique values (ukey/pkey) */
-                               if (e->l && exps_unique(v->sql, rel, e->l))
-                                       set_nodistinct(e);
-                       }
-               }
-       }
-
        if (is_groupby(rel->op)) {
                sql_rel *l = rel->l;
                if (!l || is_groupby(l->op))
@@ -9360,7 +9360,6 @@ rel_remove_union_partitions(visitor *v, 
 static sql_rel *
 rel_first_level_optimizations(visitor *v, sql_rel *rel)
 {
-       rel = rel_distinct_aggregate_on_unique_values(v, rel);
        /* rel_simplify_math optimizer requires to clear the hash, so make sure 
it runs last in this batch */
        if (v->value_based_opt)
                rel = rel_simplify_math(v, rel);
@@ -9488,6 +9487,7 @@ rel_optimize_projections(visitor *v, sql
        rel = rel_push_groupby_down(v, rel);
        rel = rel_groupby_order(v, rel);
        rel = rel_reduce_groupby_exps(v, rel);
+       rel = rel_distinct_aggregate_on_unique_values(v, rel);
        rel = rel_groupby_distinct(v, rel);
        rel = rel_push_count_down(v, rel);
        /* only when value_based_opt is on, ie not for dependency resolution */
diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h
--- a/sql/server/rel_optimizer.h
+++ b/sql/server/rel_optimizer.h
@@ -17,6 +17,7 @@ extern sql_rel *rel_optimizer(mvc *sql, 
 extern int exp_joins_rels(sql_exp *e, list *rels);
 
 extern sql_column *name_find_column(sql_rel *rel, const char *rname, const 
char *name, int pnr, sql_rel **bt);
+/* WARNING exps_unique doesn't check for duplicate NULL values */
 extern int exps_unique(mvc *sql, sql_rel *rel, list *exps);
 
 extern sql_rel *rel_dce(mvc *sql, sql_rel *rel);
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -86,7 +86,7 @@ static int
 is_distinct_set(mvc *sql, sql_rel *rel, list *ad)
 {
        int distinct = 0;
-       if (ad && exps_unique(sql, rel, ad ))
+       if (ad && exps_unique(sql, rel, ad) && !have_nil(ad))
                return 1;
        if (ad && is_groupby(rel->op) && exp_match_list(rel->r, ad))
                return 1;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to