Changeset: 12ad397d52a5 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/12ad397d52a5
Modified Files:
sql/server/rel_exp.c
sql/server/rel_exp.h
sql/server/rel_optimize_proj.c
sql/server/rel_rel.c
sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
sql/test/miscellaneous/Tests/groupby_error.test
sql/test/miscellaneous/Tests/groupby_expressions.test
sql/test/miscellaneous/Tests/simple_selects.test
Branch: default
Log Message:
Move identical grouping columns detection as an optimizer. It fixes query with
self reference being used as a grouping column
diffs (239 lines):
diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -1109,20 +1109,15 @@ exp_cmp( sql_exp *e1, sql_exp *e2)
return (e1 == e2)?0:-1;
}
-#define alias_cmp(e1, e2) \
- do { \
- if (e1->alias.rname && e2->alias.rname &&
strcmp(e1->alias.rname, e2->alias.rname) == 0) \
- return strcmp(e1->alias.name, e2->alias.name); \
- if (!e1->alias.rname && !e2->alias.rname && e1->alias.label ==
e2->alias.label && e1->alias.name && e2->alias.name) \
- return strcmp(e1->alias.name, e2->alias.name); \
- } while (0);
-
int
exp_equal( sql_exp *e1, sql_exp *e2)
{
if (e1 == e2)
return 0;
- alias_cmp(e1, e2);
+ if (e1->alias.rname && e2->alias.rname && strcmp(e1->alias.rname,
e2->alias.rname) == 0)
+ return strcmp(e1->alias.name, e2->alias.name);
+ if (!e1->alias.rname && !e2->alias.rname && e1->alias.label ==
e2->alias.label && e1->alias.name && e2->alias.name)
+ return strcmp(e1->alias.name, e2->alias.name);
return -1;
}
@@ -1399,27 +1394,6 @@ exps_any_match(list *l, sql_exp *e)
}
static int
-exp_no_alias(sql_exp *e1, sql_exp *e2)
-{
- alias_cmp(e1, e2);
- /* at least one of the expressions don't have an alias, so there's a
match */
- return 0;
-}
-
-sql_exp *
-exps_any_match_same_or_no_alias(list *l, sql_exp *e)
-{
- if (!l)
- return NULL;
- for (node *n = l->h; n ; n = n->next) {
- sql_exp *ne = (sql_exp *) n->data;
- if ((exp_match(ne, e) || exp_refers(ne, e) || exp_match_exp(ne,
e)) && exp_no_alias(e, ne) == 0)
- return ne;
- }
- return NULL;
-}
-
-static int
exps_are_joins( list *l )
{
if (l)
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -141,7 +141,6 @@ extern int exp_match( sql_exp *e1, sql_e
extern sql_exp* exps_find_exp( list *l, sql_exp *e);
extern int exp_match_exp( sql_exp *e1, sql_exp *e2);
extern sql_exp* exps_any_match(list *l, sql_exp *e);
-extern sql_exp *exps_any_match_same_or_no_alias(list *l, sql_exp *e);
/* match just the column (cmp equality) expressions */
extern int exp_match_col_exps( sql_exp *e, list *l);
extern int exps_match_col_exps( sql_exp *e1, sql_exp *e2);
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -1333,6 +1333,55 @@ rel_project_cse(visitor *v, sql_rel *rel
return rel;
}
+/* remove identical grouping columns */
+static inline sql_rel *
+rel_groupby_cse(visitor *v, sql_rel *rel)
+{
+ if (is_groupby(rel->op) && !list_empty(rel->r)) {
+ sql_rel *l = rel->l;
+ int needed = 0;
+
+ for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+ sql_exp *e = n->data;
+ e->used = 0; /* we need to use this flag, clean it
first */
+ }
+ for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+ sql_exp *e1 = n->data;
+ /* TODO maybe cover more cases? Here I only look at the
left relation */
+ sql_exp *e3 = e1->type == e_column ? (e1->l ?
exps_bind_column2(l->exps, e1->l, e1->r, NULL) : exps_bind_column(l->exps,
e1->r, NULL, NULL, 0)) : NULL;
+
+ for (node *m=n->next; m; m = m->next) {
+ sql_exp *e2 = m->data;
+ sql_exp *e4 = e2->type == e_column ? (e2->l ?
exps_bind_column2(l->exps, e2->l, e2->r, NULL) : exps_bind_column(l->exps,
e2->r, NULL, NULL, 0)) : NULL;
+
+ if (exp_match_exp(e1, e2) || exp_refers(e1, e2)
|| (e3 && e4 && (exp_match_exp(e3, e4) || exp_refers(e3, e4)))) {
+ e2->used = 1; /* flag it as being
removed */
+ needed = 1;
+ }
+ }
+ }
+
+ if (!needed)
+ return rel;
+
+ if (!is_simple_project(l->op) || !list_empty(l->r) ||
rel_is_ref(l) || need_distinct(l) || is_single(l))
+ rel->l = l = rel_project(v->sql->sa, l,
rel_projections(v->sql, l, NULL, 1, 1));
+
+ for (node *n=((list*)rel->r)->h; n ; ) {
+ node *next = n->next;
+ sql_exp *e = n->data;
+
+ if (e->used) { /* remove unecessary grouping columns */
+ e->used = 0;
+ list_append(l->exps, e);
+ list_remove_node(rel->r, NULL, n);
+ }
+ n = next;
+ }
+ }
+ return rel;
+}
+
sql_exp *list_exps_uses_exp(list *exps, const char *rname, const char *name);
static sql_exp*
@@ -2466,6 +2515,7 @@ rel_optimize_projections_(visitor *v, sq
if (!rel || !is_groupby(rel->op))
return rel;
+ rel = rel_groupby_cse(v, rel);
rel = rel_push_aggr_down(v, rel);
rel = rel_push_groupby_down(v, rel);
rel = rel_groupby_order(v, rel);
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -909,10 +909,16 @@ rel_groupby(mvc *sql, sql_rel *l, list *
list *gexps = sa_list(sql->sa);
for (en = groupbyexps->h; en; en = en->next) {
- sql_exp *e = en->data;
+ sql_exp *e = en->data, *ne = exps_find_exp(gexps, e);
- if (!exps_any_match_same_or_no_alias(gexps, e))
- append(gexps, e);
+ if (!ne) {
+ list_append(gexps, e);
+ } else {
+ const char *ername = exp_relname(e), *nername =
exp_relname(ne), *ename = exp_name(e), *nename = exp_name(ne);
+ if ((ername && !nername) || (!ername &&
nername) ||
+ (ername && nername &&
strcmp(ername,nername) != 0) || strcmp(ename,nename) != 0)
+ list_append(gexps, e);
+ }
}
groupbyexps = gexps;
}
diff --git
a/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
b/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
--- a/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
+++ b/sql/test/BugTracker-2015/Tests/crash_in_reduce_groupby.Bug-3818.test
@@ -27,8 +27,10 @@ GROUP BY cods, elrik, ether, jaelen, sor
----
project (
| group by (
-| | table("sys"."t2a") [ "t2a"."tib0" ]
-| ) [ tinyint(8) "0" as "sora" ] [ tinyint(8) "0" as "cods", tinyint(8) "0" as
"elrik", tinyint(8) "0" as "ether", tinyint(8) "0" as "jaelen", "sora" NOT
NULL, "sys"."min" no nil ("t2a"."tib0") as "%1"."%1" ]
+| | project (
+| | | table("sys"."t2a") [ "t2a"."tib0" ]
+| | ) [ "t2a"."tib0", tinyint(8) "0" as "elrik", tinyint(8) "0" as "ether",
tinyint(8) "0" as "jaelen", tinyint(8) "0" as "sora" ]
+| ) [ tinyint(8) "0" as "cods" ] [ "cods" NOT NULL, "elrik" NOT NULL, "ether"
NOT NULL, "jaelen" NOT NULL, "sora" NOT NULL, "sys"."min" no nil ("t2a"."tib0")
as "%1"."%1" ]
) [ "cods" NOT NULL, "elrik" NOT NULL, "ether" NOT NULL, "jaelen" NOT NULL,
"sora" NOT NULL, "%1"."%1" ]
statement ok
diff --git a/sql/test/miscellaneous/Tests/groupby_error.test
b/sql/test/miscellaneous/Tests/groupby_error.test
--- a/sql/test/miscellaneous/Tests/groupby_error.test
+++ b/sql/test/miscellaneous/Tests/groupby_error.test
@@ -405,8 +405,8 @@ project (
| | | table("sys"."tab1") [ "tab1"."part" as "myalias0"."part", "tab1"."tet"
as "myalias0"."tet" ],
| | | table("sys"."tab2") [ "tab2"."myk" as "myalias1"."myk", "tab2"."ups" as
"myalias1"."ups" ]
| | ) [ ("myalias0"."part") = ("myalias1"."myk") ]
-| ) [ "myalias1"."ups", "myalias0"."part", "myalias0"."tet" as "track" ] [
"myalias0"."part", "track", "sys"."count"() NOT NULL as "%1"."%1" ]
-) [ "myalias0"."part", "track" as "tet", "%1"."%1" NOT NULL as "mycount",
"track" ]
+| ) [ "myalias1"."ups", "myalias0"."part", "myalias0"."tet" as "track" ] [
"myalias0"."part", "track", "myalias0"."tet", "sys"."count"() NOT NULL as
"%1"."%1" ]
+) [ "myalias0"."part", "myalias0"."tet", "%1"."%1" NOT NULL as "mycount",
"track" ]
query IIII rowsort
select
diff --git a/sql/test/miscellaneous/Tests/groupby_expressions.test
b/sql/test/miscellaneous/Tests/groupby_expressions.test
--- a/sql/test/miscellaneous/Tests/groupby_expressions.test
+++ b/sql/test/miscellaneous/Tests/groupby_expressions.test
@@ -26,7 +26,7 @@ project (
| group by (
| | project (
| | | table("sys"."groupings") [ "groupings"."aa" ]
-| | ) [ bigint(33)["groupings"."aa"] as "%2"."%2", "sys"."sql_add"("%2"."%2",
bigint(33) "1") as "%1"."%1" ]
+| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3",
bigint(33) "1") as "%1"."%1" ]
| ) [ "%1"."%1" ] [ "%1"."%1" ]
) [ "%1"."%1" ]
diff --git a/sql/test/miscellaneous/Tests/simple_selects.test
b/sql/test/miscellaneous/Tests/simple_selects.test
--- a/sql/test/miscellaneous/Tests/simple_selects.test
+++ b/sql/test/miscellaneous/Tests/simple_selects.test
@@ -896,6 +896,36 @@ 1
statement ok
drop function dosomething
+query II rowsort
+SELECT x AS x, x AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+1
+1
+2
+2
+3
+3
+
+# remove 'x1' from grouping columns list
+query T nosort
+plan SELECT x AS x, x AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+project (
+| group by (
+| | [ [ tinyint(2) "1", tinyint(2) "2", tinyint(2) "3" ] as "x"."x", "x"."x"
as "x1" ]
+| ) [ "x"."x" ] [ "x"."x", "x1" ]
+) [ "x"."x", "x1" ]
+
+query II rowsort
+SELECT x AS x, x + 1 AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+1
+2
+2
+3
+3
+4
+
statement ok
create global temp table x(x int, y int)
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]