Changeset: 51548bcfb19c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/51548bcfb19c
Branch: default
Log Message:
Merge with Sep2022 branch.
diffs (truncated from 472 to 300 lines):
diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -15,17 +15,17 @@ static sql_exp *
comparison_find_column(sql_exp *input, sql_exp *e)
{
switch (input->type) {
- case e_convert: {
- list *types = (list *)input->r;
- sql_class from =
((sql_subtype*)types->h->data)->type->eclass, to =
((sql_subtype*)types->h->next->data)->type->eclass;
- if (from == to)
- return comparison_find_column(input->l, e) ?
input : NULL;
- return NULL;
- }
- case e_column:
- return exp_match(e, input) ? input : NULL;
- default:
- return NULL;
+ case e_convert: {
+ list *types = (list *)input->r;
+ sql_class from = ((sql_subtype*)types->h->data)->type->eclass,
to = ((sql_subtype*)types->h->next->data)->type->eclass;
+ if (from == to)
+ return comparison_find_column(input->l, e) ? input :
NULL;
+ return NULL;
+ }
+ case e_column:
+ return exp_match(e, input) ? input : NULL;
+ default:
+ return NULL;
}
}
@@ -62,7 +62,7 @@ rel_propagate_column_ref_statistics(mvc
if (comp->type == e_cmp) {
if (is_theta_exp(comp->flag) &&
((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))
|| (fe && (fne = comparison_find_column(fe, e))))) {
atom *lval_min =
find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p,
PROP_MAX),
- *rval_min =
find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p,
PROP_MAX);
+ *rval_min =
find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p,
PROP_MAX);
/* not semantics found
or if explicitly filtering not null values from the column */
found_without_semantics
|= !is_semantics(comp) || (comp->flag == cmp_equal && lne && is_anti(comp) &&
exp_is_null(re));
@@ -115,16 +115,16 @@ rel_propagate_column_ref_statistics(mvc
} else if (lval_min &&
lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 &&
atom_cmp(rval_min, lval_max) <= 0) {
/* both min and
max must be set and the intervals must overlap */
switch
(comp->flag) {
- case cmp_equal:
{ /* for equality reduce */
+ case cmp_equal:
/* for equality reduce */
set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql,
lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql,
lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
- } break;
- case
cmp_notequal: { /* for inequality expand */
+ break;
+ case
cmp_notequal: /* for inequality expand */
set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql,
lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql,
lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
- } break;
+ break;
case cmp_gt:
- case cmp_gte: {
+ case cmp_gte:
if
(!is_anti(comp) && lne) { /* min is max from both min */
prop *p = find_prop(e->p, PROP_MIN);
set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min,
p->value.pval) : rval_min);
@@ -132,9 +132,9 @@ rel_propagate_column_ref_statistics(mvc
prop *p = find_prop(e->p, PROP_MAX);
set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max,
p->value.pval) : lval_max);
}
- } break;
+ break;
case cmp_lt:
- case cmp_lte: {
+ case cmp_lte:
if
(!is_anti(comp) && lne) { /* max is min from both max */
prop *p = find_prop(e->p, PROP_MAX);
set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max,
p->value.pval) : rval_max);
@@ -142,7 +142,7 @@ rel_propagate_column_ref_statistics(mvc
prop *p = find_prop(e->p, PROP_MIN);
set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min,
p->value.pval) : lval_min);
}
- } break;
+ break;
default: /*
Maybe later I can do cmp_in and cmp_notin */
break;
}
@@ -185,7 +185,7 @@ rel_propagate_column_ref_statistics(mvc
prop *p = e->p =
prop_create(sql->sa, PROP_NUNIQUES, e->p);
p->value.dval = 1;
} else if (((is_basetable(rel->op) ||
is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
-
(is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
+
(is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
(est =
find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
prop *p = e->p =
prop_create(sql->sa, PROP_NUNIQUES, e->p);
p->value.dval = est->value.dval;
@@ -197,7 +197,7 @@ rel_propagate_column_ref_statistics(mvc
}
case op_topn:
case op_sample:
- return rel_propagate_column_ref_statistics(sql,
rel->l, e);
+ return rel_propagate_column_ref_statistics(sql, rel->l,
e);
default:
break;
}
@@ -257,7 +257,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
{
sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max =
find_prop_and_get(le->p, PROP_MAX),
- *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max =
find_prop_and_get(re->p, PROP_MAX);
+ *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max =
find_prop_and_get(re->p, PROP_MAX);
prop *est;
/* for the intersection, if both expresssions don't overlap, it can be
pruned */
@@ -315,7 +315,7 @@ rel_propagate_statistics(visitor *v, sql
(void) depth;
switch(e->type) {
- case e_column: {
+ case e_column:
switch (rel->op) { /* set relations don't call
rel_propagate_statistics */
case op_join:
case op_left:
@@ -326,14 +326,16 @@ rel_propagate_statistics(visitor *v, sql
sql_exp *found =
rel_propagate_column_ref_statistics(sql, rel->l, e);
if (!found)
(void) rel_propagate_column_ref_statistics(sql,
rel->r, e);
- } break;
+ break;
+ }
case op_select:
case op_project:
case op_groupby: {
sql_exp *found =
rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on
the same projection, ugh */
if (!found && is_simple_project(rel->op))
(void) rel_propagate_column_ref_statistics(sql,
rel, e);
- } break;
+ break;
+ }
case op_insert:
case op_update:
case op_delete:
@@ -342,7 +344,7 @@ rel_propagate_statistics(visitor *v, sql
default:
break;
}
- } break;
+ break;
case e_convert: {
sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
sql_exp *l = e->l;
@@ -370,7 +372,8 @@ rel_propagate_statistics(visitor *v, sql
}
if (!has_nil(l))
set_has_no_nil(e);
- } break;
+ break;
+ }
case e_aggr:
case e_func: {
BUN lv;
@@ -402,8 +405,9 @@ rel_propagate_statistics(visitor *v, sql
}
set_unique(e);
}
- } break;
- case e_atom: {
+ break;
+ }
+ case e_atom:
if (e->l) {
atom *a = (atom*) e->l;
if (!a->isnull) {
@@ -454,7 +458,7 @@ rel_propagate_statistics(visitor *v, sql
prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES,
e->p);
p->value.dval = 1;
}
- } break;
+ break;
case e_cmp:
/* TODO? propagating min/max/unique of booleans is not very
worth it */
if (e->flag == cmp_or || e->flag == cmp_filter) {
@@ -521,7 +525,7 @@ rel_prune_predicates(visitor *v, sql_rel
always_true |= !has_nil(le) && !has_nil(re) &&
!has_nil(fe) &&
lval_min && lval_max && rval_min &&
rval_max && fval_min && fval_max &&
(is_anti(e) ? ((lower == cmp_gte ?
atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) ||
(higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min,
fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
- ((lower == cmp_gte ? atom_cmp(lval_min,
rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ?
atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
+ ((lower == cmp_gte ?
atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) &&
(higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min,
lval_max) > 0)));
} else if (!fe) {
if (!is_semantics(e)) /* trival not null cmp
null case */
always_false |= !is_anti(e) &&
((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) &&
exp_is_not_null(re)));
@@ -670,7 +674,7 @@ rel_get_statistics_(visitor *v, sql_rel
return rel;
rel->used |= statistics_gathered;
- switch(rel->op){
+ switch (rel->op) {
case op_basetable: {
sql_table *t = (sql_table *) rel->l;
sqlstore *store = v->sql->session->tr->store;
@@ -682,7 +686,8 @@ rel_get_statistics_(visitor *v, sql_rel
/* Set table row count. TODO? look for remote tables. Don't
look at storage for declared tables, because it won't be cleaned */
if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active
rows only */
set_count_prop(v->sql->sa, rel,
(BUN)store->storage_api.count_col(v->sql->session->tr,
ol_first_node(t->columns)->data, 10));
- } break;
+ break;
+ }
case op_union:
case op_inter:
case op_except: {
@@ -777,7 +782,8 @@ rel_get_statistics_(visitor *v, sql_rel
set_nodistinct(rel); /* set relations may have distinct
flag set */
v->changes++;
}
- } break;
+ break;
+ }
case op_join:
case op_left:
case op_right:
@@ -859,12 +865,13 @@ rel_get_statistics_(visitor *v, sql_rel
} else if (lv != BUN_NONE && rv != BUN_NONE) {
set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX
/ lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
}
- } break;
- case op_anti: {
+ break;
+ }
+ case op_anti:
set_count_prop(v->sql->sa, rel, get_rel_count(l));
- } break;
+ break;
case op_semi:
- case op_select: {
+ case op_select:
/* TODO calculate cardinalities using selectivities */
if (list_length(rel->exps) == 1 &&
(exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
set_count_prop(v->sql->sa, rel, 0);
@@ -877,20 +884,21 @@ rel_get_statistics_(visitor *v, sql_rel
/* simple expressions first */
if (e->type == e_cmp && e->flag
== cmp_equal && exp_is_atom(er)) {
/* use selectivity */
- prop *p = NULL;
+ prop *p;
if ((p =
find_prop(el->p, PROP_NUNIQUES))) {
u = (BUN)
p->value.dval;
+ break;
}
}
}
/* u is an *estimate*, so don't set
count_prop to 0 unless cnt is 0 */
- set_count_prop(v->sql->sa, rel, cnt ==
0 ? 0 : u > cnt ? 1 : cnt/u);
+ set_count_prop(v->sql->sa, rel, cnt ==
0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
} else {
set_count_prop(v->sql->sa, rel,
get_rel_count(l));
}
}
- } break;
- case op_project: {
+ break;
+ case op_project:
if (l) {
if (need_distinct(rel)) {
set_count_prop(v->sql->sa, rel,
rel_calc_nuniques(v->sql, l, rel->exps));
@@ -908,18 +916,19 @@ rel_get_statistics_(visitor *v, sql_rel
}
set_count_prop(v->sql->sa, rel, card);
}
- } break;
- case op_groupby: {
+ break;
+ case op_groupby:
if (list_empty(rel->r)) {
set_count_prop(v->sql->sa, rel, 1);
} else {
set_count_prop(v->sql->sa, rel,
rel_calc_nuniques(v->sql, l, rel->r));
}
- } break;
+ break;
default:
break;
}
- } break;
+ break;
+ }
case op_topn: {
BUN lv = get_rel_count(rel->l);
@@ -935,7 +944,8 @@ rel_get_statistics_(visitor *v, sql_rel
}
set_count_prop(v->sql->sa, rel, lv);
}
- } break;
+ break;
+ }
case op_sample: {
BUN lv = get_rel_count(rel->l);
@@ -953,7 +963,8 @@ rel_get_statistics_(visitor *v, sql_rel
}
set_count_prop(v->sql->sa, rel, lv);
}
- } break;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]