Changeset: c69aeb4cb0cb for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c69aeb4cb0cb
Modified Files:
sql/server/rel_exp.c
sql/server/rel_exp.h
sql/server/rel_optimizer.c
sql/server/rel_optimizer.h
sql/server/rel_unnest.c
Branch: antipush
Log Message:
Updated exps_unique with new property. batappend-undefined.Bug-7130 test now
crashes, but I think it's from the bug in Jul2021 branch
diffs (237 lines):
diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -641,6 +641,19 @@ have_nil(list *exps)
return has_nil;
}
+int
+have_semantics(list *exps)
+{
+ int has_semantics = 0;
+
+ if (exps)
+ for (node *n = exps->h; n && !has_semantics; n = n->next) {
+ sql_exp *e = n->data;
+ has_semantics |= is_compare(e->type) && is_semantics(e);
+ }
+ return has_semantics;
+}
+
sql_exp *
exp_column(sql_allocator *sa, const char *rname, const char *cname,
sql_subtype *t, unsigned int card, int has_nils, int unique, int intern)
{
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -85,6 +85,7 @@ extern sql_exp * exp_values(sql_allocato
extern list * exp_get_values(sql_exp *e); /* get expression list from the
values expression */
extern list * exp_types(sql_allocator *sa, list *exps);
extern int have_nil(list *exps);
+extern int have_semantics(list *exps);
sql_export sql_exp * exp_column(sql_allocator *sa, const char *rname, const
char *name, sql_subtype *t, unsigned int card, int has_nils, int unique, int
intern);
extern sql_exp * exp_propagate(sql_allocator *sa, sql_exp *ne, sql_exp *oe);
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -2301,17 +2301,16 @@ exp_push_down_prj(mvc *sql, sql_exp *e,
}
static int
-rel_is_unique( sql_rel *rel, sql_ukey *k)
+rel_is_unique(sql_rel *rel)
{
switch(rel->op) {
- case op_left:
- case op_right:
- case op_full:
- case op_join:
- return 0;
case op_semi:
case op_anti:
- return rel_is_unique(rel->l, k);
+ case op_inter:
+ case op_except:
+ case op_topn:
+ case op_sample:
+ return rel_is_unique(rel->l);
case op_table:
case op_basetable:
return 1;
@@ -2320,43 +2319,48 @@ rel_is_unique( sql_rel *rel, sql_ukey *k
}
}
+/* WARNING exps_unique doesn't check for duplicate NULL values */
int
exps_unique(mvc *sql, sql_rel *rel, list *exps)
{
- node *n;
- char *matched = NULL;
- int nr = 0;
+ int nr = 0, need_check = 0;
sql_ukey *k = NULL;
if (list_empty(exps))
return 0;
- for(n = exps->h; n && !k; n = n->next) {
+ for(node *n = exps->h; n ; n = n->next) {
sql_exp *e = n->data;
prop *p;
- if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL)
- k = p->value;
- }
- if (!k || list_length(k->k.columns) > list_length(exps))
+ if (!is_unique(e)) { /* ignore unique columns */
+ need_check++;
+ if (!k && (p = find_prop(e->p, PROP_HASHCOL))) /* at
the moment, use only one k */
+ k = p->value;
+ }
+ }
+ if (!need_check) /* all have unique property return */
+ return 1;
+ if (!k || list_length(k->k.columns) != need_check)
return 0;
if (rel) {
- matched = SA_ZNEW_ARRAY(sql->sa, char,
list_length(k->k.columns));
- for(n = exps->h; n; n = n->next) {
+ char *matched = SA_ZNEW_ARRAY(sql->sa, char,
list_length(k->k.columns));
+ fcmp cmp = (fcmp)&kc_column_cmp;
+ for(node *n = exps->h; n; n = n->next) {
sql_exp *e = n->data;
- fcmp cmp = (fcmp)&kc_column_cmp;
- sql_column *c = exp_find_column(rel, e, -2);
+ sql_column *c;
node *m;
- if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) {
+ if (is_unique(e))
+ continue;
+ if ((c = exp_find_column(rel, e, -2)) != NULL && (m =
list_find(k->k.columns, c, cmp)) != NULL) {
int pos = list_position(k->k.columns, m->data);
if (!matched[pos])
nr++;
matched[pos] = 1;
}
}
- if (nr == list_length(k->k.columns)) {
- return rel_is_unique(rel, k);
- }
+ if (nr == list_length(k->k.columns))
+ return rel_is_unique(rel);
}
return 0;
}
@@ -2400,12 +2404,15 @@ rel_distinct_aggregate_on_unique_values(
if (exp->type == e_aggr && need_distinct(exp)) {
bool all_unique = true;
-
- for (node *m = ((list*)exp->l)->h; m &&
all_unique; m = m->next) {
+ list *l = exp->l;
+
+ for (node *m = l->h; m && all_unique; m =
m->next) {
sql_exp *arg = (sql_exp*) m->data;
all_unique &= arg->type == e_column &&
is_unique(arg) && (!is_semantics(exp) || !has_nil(arg));
}
+ if (!all_unique && exps_card(l) > CARD_ATOM)
+ all_unique = exps_unique(v->sql, rel,
l) && (!is_semantics(exp) || !have_nil(l));
if (all_unique) {
set_nodistinct(exp);
v->changes++;
@@ -2555,7 +2562,8 @@ rel_distinct_project2groupby(visitor *v,
/* rewrite distinct project [ pk ] ( select ( table ) [ e op val ])
* into project [ pk ] ( select/semijoin ( table ) */
if (rel->op == op_project && rel->l && !rel->r /* no order by */ &&
need_distinct(rel) &&
- (l->op == op_select || l->op == op_semi) && exps_unique(v->sql,
rel, rel->exps)) {
+ (l->op == op_select || l->op == op_semi) && exps_unique(v->sql,
rel, rel->exps) &&
+ (!have_semantics(l->exps) || !have_nil(rel->exps))) {
set_nodistinct(rel);
v->changes++;
}
@@ -5469,8 +5477,12 @@ static inline sql_rel *
rel_push_project_down_union(visitor *v, sql_rel *rel)
{
/* first remove distinct if already unique */
- if (rel->op == op_project && need_distinct(rel) && rel->exps &&
exps_unique(v->sql, rel, rel->exps))
+ if (rel->op == op_project && need_distinct(rel) && rel->exps &&
exps_unique(v->sql, rel, rel->exps) && !have_nil(rel->exps)) {
set_nodistinct(rel);
+ if (exps_card(rel->exps) <= CARD_ATOM && rel->card > CARD_ATOM)
/* if the projection just contains constants, then no topN is needed */
+ rel->l = rel_topn(v->sql->sa, rel->l,
append(sa_list(v->sql->sa), exp_atom_lng(v->sql->sa, 1)));
+ v->changes++;
+ }
if (rel->op == op_project && rel->l && rel->exps && !rel->r) {
int need_distinct = need_distinct(rel);
@@ -5495,8 +5507,8 @@ rel_push_project_down_union(visitor *v,
ur = rel_project(v->sql->sa, ur,
rel_projections(v->sql, ur, NULL, 1, 1));
need_distinct = (need_distinct &&
- (!exps_unique(v->sql, ul, ul->exps) ||
- !exps_unique(v->sql, ur, ur->exps)));
+ (!exps_unique(v->sql, ul, ul->exps) ||
have_nil(ul->exps) ||
+ !exps_unique(v->sql, ur, ur->exps) ||
have_nil(ur->exps)));
rel_rename_exps(v->sql, u->exps, ul->exps);
rel_rename_exps(v->sql, u->exps, ur->exps);
@@ -5914,18 +5926,6 @@ rel_groupby_distinct(visitor *v, sql_rel
{
node *n;
- if (is_groupby(rel->op) && !rel_is_ref(rel) && rel->exps &&
list_empty(rel->r)) {
- for (n = rel->exps->h; n; n = n->next) {
- sql_exp *e = n->data;
-
- if (exp_aggr_is_count(e) && need_distinct(e)) {
- /* if count over unique values (ukey/pkey) */
- if (e->l && exps_unique(v->sql, rel, e->l))
- set_nodistinct(e);
- }
- }
- }
-
if (is_groupby(rel->op)) {
sql_rel *l = rel->l;
if (!l || is_groupby(l->op))
@@ -9360,7 +9360,6 @@ rel_remove_union_partitions(visitor *v,
static sql_rel *
rel_first_level_optimizations(visitor *v, sql_rel *rel)
{
- rel = rel_distinct_aggregate_on_unique_values(v, rel);
/* rel_simplify_math optimizer requires to clear the hash, so make sure
it runs last in this batch */
if (v->value_based_opt)
rel = rel_simplify_math(v, rel);
@@ -9488,6 +9487,7 @@ rel_optimize_projections(visitor *v, sql
rel = rel_push_groupby_down(v, rel);
rel = rel_groupby_order(v, rel);
rel = rel_reduce_groupby_exps(v, rel);
+ rel = rel_distinct_aggregate_on_unique_values(v, rel);
rel = rel_groupby_distinct(v, rel);
rel = rel_push_count_down(v, rel);
/* only when value_based_opt is on, ie not for dependency resolution */
diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h
--- a/sql/server/rel_optimizer.h
+++ b/sql/server/rel_optimizer.h
@@ -17,6 +17,7 @@ extern sql_rel *rel_optimizer(mvc *sql,
extern int exp_joins_rels(sql_exp *e, list *rels);
extern sql_column *name_find_column(sql_rel *rel, const char *rname, const
char *name, int pnr, sql_rel **bt);
+/* WARNING exps_unique doesn't check for duplicate NULL values */
extern int exps_unique(mvc *sql, sql_rel *rel, list *exps);
extern sql_rel *rel_dce(mvc *sql, sql_rel *rel);
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -86,7 +86,7 @@ static int
is_distinct_set(mvc *sql, sql_rel *rel, list *ad)
{
int distinct = 0;
- if (ad && exps_unique(sql, rel, ad ))
+ if (ad && exps_unique(sql, rel, ad) && !have_nil(ad))
return 1;
if (ad && is_groupby(rel->op) && exp_match_list(rel->r, ad))
return 1;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list