Changeset: 12ad397d52a5 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/12ad397d52a5
Modified Files:
        sql/server/rel_exp.c
        sql/server/rel_exp.h
        sql/server/rel_optimize_proj.c
        sql/server/rel_rel.c
        sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
        sql/test/miscellaneous/Tests/groupby_error.test
        sql/test/miscellaneous/Tests/groupby_expressions.test
        sql/test/miscellaneous/Tests/simple_selects.test
Branch: default
Log Message:

Move identical grouping columns detection as an optimizer. It fixes query with 
self reference being used as a grouping column


diffs (239 lines):

diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -1109,20 +1109,15 @@ exp_cmp( sql_exp *e1, sql_exp *e2)
        return (e1 == e2)?0:-1;
 }
 
-#define alias_cmp(e1, e2) \
-       do { \
-               if (e1->alias.rname && e2->alias.rname && 
strcmp(e1->alias.rname, e2->alias.rname) == 0) \
-                       return strcmp(e1->alias.name, e2->alias.name); \
-               if (!e1->alias.rname && !e2->alias.rname && e1->alias.label == 
e2->alias.label && e1->alias.name && e2->alias.name) \
-                       return strcmp(e1->alias.name, e2->alias.name); \
-       } while (0);
-
 int
 exp_equal( sql_exp *e1, sql_exp *e2)
 {
        if (e1 == e2)
                return 0;
-       alias_cmp(e1, e2);
+       if (e1->alias.rname && e2->alias.rname && strcmp(e1->alias.rname, 
e2->alias.rname) == 0)
+               return strcmp(e1->alias.name, e2->alias.name);
+       if (!e1->alias.rname && !e2->alias.rname && e1->alias.label == 
e2->alias.label && e1->alias.name && e2->alias.name)
+               return strcmp(e1->alias.name, e2->alias.name);
        return -1;
 }
 
@@ -1399,27 +1394,6 @@ exps_any_match(list *l, sql_exp *e)
 }
 
 static int
-exp_no_alias(sql_exp *e1, sql_exp *e2)
-{
-       alias_cmp(e1, e2);
-       /* at least one of the expressions don't have an alias, so there's a 
match */
-       return 0;
-}
-
-sql_exp *
-exps_any_match_same_or_no_alias(list *l, sql_exp *e)
-{
-       if (!l)
-               return NULL;
-       for (node *n = l->h; n ; n = n->next) {
-               sql_exp *ne = (sql_exp *) n->data;
-               if ((exp_match(ne, e) || exp_refers(ne, e) || exp_match_exp(ne, 
e)) && exp_no_alias(e, ne) == 0)
-                       return ne;
-       }
-       return NULL;
-}
-
-static int
 exps_are_joins( list *l )
 {
        if (l)
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -141,7 +141,6 @@ extern int exp_match( sql_exp *e1, sql_e
 extern sql_exp* exps_find_exp( list *l, sql_exp *e);
 extern int exp_match_exp( sql_exp *e1, sql_exp *e2);
 extern sql_exp* exps_any_match(list *l, sql_exp *e);
-extern sql_exp *exps_any_match_same_or_no_alias(list *l, sql_exp *e);
 /* match just the column (cmp equality) expressions */
 extern int exp_match_col_exps( sql_exp *e, list *l);
 extern int exps_match_col_exps( sql_exp *e1, sql_exp *e2);
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -1333,6 +1333,55 @@ rel_project_cse(visitor *v, sql_rel *rel
        return rel;
 }
 
+/* remove identical grouping columns */
+static inline sql_rel *
+rel_groupby_cse(visitor *v, sql_rel *rel)
+{
+       if (is_groupby(rel->op) && !list_empty(rel->r)) {
+               sql_rel *l = rel->l;
+               int needed = 0;
+
+               for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+                       sql_exp *e = n->data;
+                       e->used = 0; /* we need to use this flag, clean it 
first */
+               }
+               for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+                       sql_exp *e1 = n->data;
+                       /* TODO maybe cover more cases? Here I only look at the 
left relation */
+                       sql_exp *e3 = e1->type == e_column ? (e1->l ? 
exps_bind_column2(l->exps, e1->l, e1->r, NULL) : exps_bind_column(l->exps, 
e1->r, NULL, NULL, 0)) : NULL;
+
+                       for (node *m=n->next; m; m = m->next) {
+                               sql_exp *e2 = m->data;
+                               sql_exp *e4 = e2->type == e_column ? (e2->l ? 
exps_bind_column2(l->exps, e2->l, e2->r, NULL) : exps_bind_column(l->exps, 
e2->r, NULL, NULL, 0)) : NULL;
+
+                               if (exp_match_exp(e1, e2) || exp_refers(e1, e2) 
|| (e3 && e4 && (exp_match_exp(e3, e4) || exp_refers(e3, e4)))) {
+                                       e2->used = 1; /* flag it as being 
removed */
+                                       needed = 1;
+                               }
+                       }
+               }
+
+               if (!needed)
+                       return rel;
+
+               if (!is_simple_project(l->op) || !list_empty(l->r) || 
rel_is_ref(l) || need_distinct(l) || is_single(l))
+                       rel->l = l = rel_project(v->sql->sa, l, 
rel_projections(v->sql, l, NULL, 1, 1));
+
+               for (node *n=((list*)rel->r)->h; n ; ) {
+                       node *next = n->next;
+                       sql_exp *e = n->data;
+
+                       if (e->used) { /* remove unecessary grouping columns */
+                               e->used = 0;
+                               list_append(l->exps, e);
+                               list_remove_node(rel->r, NULL, n);
+                       }
+                       n = next;
+               }
+       }
+       return rel;
+}
+
 sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
 
 static sql_exp*
@@ -2466,6 +2515,7 @@ rel_optimize_projections_(visitor *v, sq
        if (!rel || !is_groupby(rel->op))
                return rel;
 
+       rel = rel_groupby_cse(v, rel);
        rel = rel_push_aggr_down(v, rel);
        rel = rel_push_groupby_down(v, rel);
        rel = rel_groupby_order(v, rel);
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -909,10 +909,16 @@ rel_groupby(mvc *sql, sql_rel *l, list *
                list *gexps = sa_list(sql->sa);
 
                for (en = groupbyexps->h; en; en = en->next) {
-                       sql_exp *e = en->data;
+                       sql_exp *e = en->data, *ne = exps_find_exp(gexps, e);
 
-                       if (!exps_any_match_same_or_no_alias(gexps, e))
-                               append(gexps, e);
+                       if (!ne) {
+                               list_append(gexps, e);
+                       } else {
+                               const char *ername = exp_relname(e), *nername = 
exp_relname(ne), *ename = exp_name(e), *nename = exp_name(ne);
+                               if ((ername && !nername) || (!ername && 
nername) || 
+                                       (ername && nername && 
strcmp(ername,nername) != 0) || strcmp(ename,nename) != 0)
+                                       list_append(gexps, e);
+                       }
                }
                groupbyexps = gexps;
        }
diff --git 
a/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test 
b/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
--- a/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
+++ b/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
@@ -27,8 +27,10 @@ GROUP BY cods, elrik, ether, jaelen, sor
 ----
 project (
 | group by (
-| | table("sys"."t2a") [ "t2a"."tib0" ]
-| ) [ tinyint(8) "0" as "sora" ] [ tinyint(8) "0" as "cods", tinyint(8) "0" as 
"elrik", tinyint(8) "0" as "ether", tinyint(8) "0" as "jaelen", "sora" NOT 
NULL, "sys"."min" no nil ("t2a"."tib0") as "%1"."%1" ]
+| | project (
+| | | table("sys"."t2a") [ "t2a"."tib0" ]
+| | ) [ "t2a"."tib0", tinyint(8) "0" as "elrik", tinyint(8) "0" as "ether", 
tinyint(8) "0" as "jaelen", tinyint(8) "0" as "sora" ]
+| ) [ tinyint(8) "0" as "cods" ] [ "cods" NOT NULL, "elrik" NOT NULL, "ether" 
NOT NULL, "jaelen" NOT NULL, "sora" NOT NULL, "sys"."min" no nil ("t2a"."tib0") 
as "%1"."%1" ]
 ) [ "cods" NOT NULL, "elrik" NOT NULL, "ether" NOT NULL, "jaelen" NOT NULL, 
"sora" NOT NULL, "%1"."%1" ]
 
 statement ok
diff --git a/sql/test/miscellaneous/Tests/groupby_error.test 
b/sql/test/miscellaneous/Tests/groupby_error.test
--- a/sql/test/miscellaneous/Tests/groupby_error.test
+++ b/sql/test/miscellaneous/Tests/groupby_error.test
@@ -405,8 +405,8 @@ project (
 | | | table("sys"."tab1") [ "tab1"."part" as "myalias0"."part", "tab1"."tet" 
as "myalias0"."tet" ],
 | | | table("sys"."tab2") [ "tab2"."myk" as "myalias1"."myk", "tab2"."ups" as 
"myalias1"."ups" ]
 | | ) [ ("myalias0"."part") = ("myalias1"."myk") ]
-| ) [ "myalias1"."ups", "myalias0"."part", "myalias0"."tet" as "track" ] [ 
"myalias0"."part", "track", "sys"."count"() NOT NULL as "%1"."%1" ]
-) [ "myalias0"."part", "track" as "tet", "%1"."%1" NOT NULL as "mycount", 
"track" ]
+| ) [ "myalias1"."ups", "myalias0"."part", "myalias0"."tet" as "track" ] [ 
"myalias0"."part", "track", "myalias0"."tet", "sys"."count"() NOT NULL as 
"%1"."%1" ]
+) [ "myalias0"."part", "myalias0"."tet", "%1"."%1" NOT NULL as "mycount", 
"track" ]
 
 query IIII rowsort
 select
diff --git a/sql/test/miscellaneous/Tests/groupby_expressions.test 
b/sql/test/miscellaneous/Tests/groupby_expressions.test
--- a/sql/test/miscellaneous/Tests/groupby_expressions.test
+++ b/sql/test/miscellaneous/Tests/groupby_expressions.test
@@ -26,7 +26,7 @@ project (
 | group by (
 | | project (
 | | | table("sys"."groupings") [ "groupings"."aa" ]
-| | ) [ bigint(33)["groupings"."aa"] as "%2"."%2", "sys"."sql_add"("%2"."%2", 
bigint(33) "1") as "%1"."%1" ]
+| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3", 
bigint(33) "1") as "%1"."%1" ]
 | ) [ "%1"."%1" ] [ "%1"."%1" ]
 ) [ "%1"."%1" ]
 
diff --git a/sql/test/miscellaneous/Tests/simple_selects.test 
b/sql/test/miscellaneous/Tests/simple_selects.test
--- a/sql/test/miscellaneous/Tests/simple_selects.test
+++ b/sql/test/miscellaneous/Tests/simple_selects.test
@@ -896,6 +896,36 @@ 1
 statement ok
 drop function dosomething
 
+query II rowsort
+SELECT x AS x, x AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+1
+1
+2
+2
+3
+3
+
+# remove 'x1' from grouping columns list
+query T nosort
+plan SELECT x AS x, x AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+project (
+| group by (
+| |  [  [ tinyint(2) "1", tinyint(2) "2", tinyint(2) "3" ] as "x"."x", "x"."x" 
as "x1" ]
+| ) [ "x"."x" ] [ "x"."x", "x1" ]
+) [ "x"."x", "x1" ]
+
+query II rowsort
+SELECT x AS x, x + 1 AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+1
+2
+2
+3
+3
+4
+
 statement ok
 create global temp table x(x int, y int)
 
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to