Changeset: d9f526791158 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d9f526791158 Modified Files: sql/server/rel_statistics.c Branch: properties Log Message:
To propagate min/max on range comparisons, the intervals must intersect,
otherwise the comparison can be pruned (doing that next)
diffs (218 lines):
diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -28,9 +28,7 @@ comparison_find_column(sql_exp *input, s
static sql_exp *
rel_propagate_column_ref_statistics(mvc *sql, sql_rel *rel, sql_exp *e)
{
- atom *lval, *rval, *fval;
sql_exp *found = NULL;
- prop *p;
assert(e->type == e_column);
if (rel) {
@@ -57,103 +55,85 @@ rel_propagate_column_ref_statistics(mvc
sql_exp *comp = n->data, *le = comp->l,
*lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
if (comp->type == e_cmp) {
- if ((lne =
comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe
&& (fne = comparison_find_column(fe, e)))) {
+ int flag = comp->flag &
~CMP_BETWEEN;
+
+ if (is_theta_exp(flag) && ((lne
= comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) ||
(fe && (fne = comparison_find_column(fe, e))))) {
+ atom *lval_min =
find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p,
PROP_MAX), *rval_min = find_prop_and_get(re->p, PROP_MIN),
+ *rval_max =
find_prop_and_get(re->p, PROP_MAX), *fval_min = fe ? find_prop_and_get(re->p,
PROP_MIN) : NULL, *fval_max = fe ? find_prop_and_get(re->p, PROP_MAX) : NULL;
+
found = found ? found :
lne ? lne : rne ? rne : fne;
if (e->semantics)
found_with_semantics = true;
- if
(is_outerjoin(rel->op)) /* on puter joins, min and max cannot be propagated */
+ if
(is_outerjoin(rel->op)) /* on outer joins, min and max cannot be propagated */
continue;
- if (fe) { /* range case
*/
- if (!e->anti &&
lne) {
- if
(comp->flag & CMP_SYMMETRIC) { /* min is max from le and (min from re and fe
min) */
-
if ((rval = find_prop_and_get(re->p, PROP_MIN)) && (fval =
find_prop_and_get(fe->p, PROP_MIN))) {
-
atom *nmin = statistics_atom_min(sql, rval, fval);
-
p = find_prop(e->p, PROP_MIN);
-
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, nmin,
p->value) : nmin);
-
} /* max is min from le and (max from re and fe max) */
-
if ((rval = find_prop_and_get(re->p, PROP_MAX)) && (fval =
find_prop_and_get(fe->p, PROP_MAX))) {
-
atom *nmax = statistics_atom_max(sql, rval, fval);
-
p = find_prop(e->p, PROP_MAX);
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax,
p->value) : nmax);
-
}
- } else
{ /* min is max from le and re min */
-
if ((rval = find_prop_and_get(re->p, PROP_MIN))) {
-
p = find_prop(e->p, PROP_MIN);
-
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval,
p->value) : rval);
-
} /* max is min from le and fe max */
-
if ((fval = find_prop_and_get(fe->p, PROP_MAX))) {
-
p = find_prop(e->p, PROP_MAX);
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, fval,
p->value) : fval);
-
}
+ if (fe && lval_min &&
lval_max) { /* range case, the middle expression must intersect the other two */
+ int int1 =
rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min,
lval_max) <= 0,
+ int2 =
fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min,
lval_max) <= 0;
+
+ if (!e->anti &&
lne && int1 && int2) {
+ if
(comp->flag & CMP_SYMMETRIC) {
+
prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
+
atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax =
statistics_atom_max(sql, rval_max, fval_max);
+
/* min is max from le and (min from re and fe min) */
+
set_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, nmin, p1->value) :
nmin);
+
/* max is min from le and (max from re and fe max) */
+
set_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, nmax, p2->value) :
nmax);
+ } else {
+
prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
+
/* min is max from le and re min */
+
set_property(sql, e, PROP_MIN, p1 ? statistics_atom_max(sql, rval_min,
p1->value) : rval_min);
+
/* max is min from le and fe max */
+
set_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max,
p2->value) : fval_max);
}
} else if
(!e->anti && rne) {
- if
(comp->flag & CMP_SYMMETRIC) { /* min is max from le and (min from re and fe
min) */
-
if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (fval =
find_prop_and_get(fe->p, PROP_MIN))) {
-
p = find_prop(e->p, PROP_MIN);
-
atom *nmin = p ? statistics_atom_min(sql, p->value, fval) : fval;
-
set_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval));
-
}
- } else
{ /* min is max from le and re min */
-
if ((lval = find_prop_and_get(le->p, PROP_MIN))) {
-
p = find_prop(e->p, PROP_MIN);
-
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval,
p->value) : lval);
-
}
+ if
(comp->flag & CMP_SYMMETRIC && int1 && int2) { /* min is max from le and (min
from re and fe min) */
+
prop *p = find_prop(e->p, PROP_MIN);
+
atom *nmin = p ? statistics_atom_min(sql, p->value, fval_min) : fval_min;
+
set_property(sql, e, PROP_MIN, statistics_atom_max(sql, nmin, lval_min));
+ } else
if (int1) { /* min is max from le and re min */
+
prop *p = find_prop(e->p, PROP_MIN);
+
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value)
: lval_min);
}
} else if
(!e->anti) {
assert(fne);
- if
(comp->flag & CMP_SYMMETRIC) { /* max is min from le and (max from re and fe
max) */
-
if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval =
find_prop_and_get(re->p, PROP_MAX))) {
-
p = find_prop(e->p, PROP_MAX);
-
atom *nmax = p ? statistics_atom_max(sql, p->value, rval) : rval;
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval)
: nmax);
-
}
- } else
{ /* max is min from le and fe max */
-
if ((lval = find_prop_and_get(le->p, PROP_MAX))) {
-
p = find_prop(e->p, PROP_MAX);
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval,
p->value) : lval);
-
}
+ if
(comp->flag & CMP_SYMMETRIC && int1 && int2) { /* max is min from le and (max
from re and fe max) */
+
prop *p = find_prop(e->p, PROP_MAX);
+
atom *nmax = p ? statistics_atom_max(sql, p->value, rval_max) : rval_max;
+
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, nmax, lval_max) :
nmax);
+ } else
if (int2) { /* max is min from le and fe max */
+
prop *p = find_prop(e->p, PROP_MAX);
+
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value)
: lval_max);
}
}
- } else {
+ } else if (lval_min &&
lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 &&
atom_cmp(rval_min, lval_max) <= 0) {
+ /* both min and
max must be set and the intervals must overlap */
switch
(comp->flag) {
- case cmp_equal:
{
- if
((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval =
find_prop_and_get(re->p, PROP_MAX)))
-
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_max(sql, lval, rval) :
statistics_atom_min(sql, lval, rval)); /* for equality reduce */
- if
((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval =
find_prop_and_get(re->p, PROP_MIN)))
-
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_min(sql, lval, rval) :
statistics_atom_max(sql, lval, rval));
+ case cmp_equal:
{ /* for equality reduce */
+
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_max(sql, lval_max,
rval_max) : statistics_atom_min(sql, lval_max, rval_max));
+
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_min(sql, lval_min,
rval_min) : statistics_atom_max(sql, lval_min, rval_min));
} break;
- case
cmp_notequal: {
- if
((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval =
find_prop_and_get(re->p, PROP_MAX)))
-
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_min(sql, lval, rval) :
statistics_atom_max(sql, lval, rval)); /* for inequality expand */
- if
((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval =
find_prop_and_get(re->p, PROP_MIN)))
-
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_max(sql, lval, rval) :
statistics_atom_min(sql, lval, rval));
+ case
cmp_notequal: { /* for inequality expand */
+
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_min(sql, lval_max,
rval_max) : statistics_atom_max(sql, lval_max, rval_max));
+
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_max(sql, lval_min,
rval_min) : statistics_atom_min(sql, lval_min, rval_min));
} break;
case cmp_gt:
case cmp_gte: {
if
(!e->anti && lne) { /* min is max from both min */
-
if ((rval = find_prop_and_get(re->p, PROP_MIN))) {
-
p = find_prop(e->p, PROP_MIN);
-
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval,
p->value) : rval);
-
}
+
prop *p = find_prop(e->p, PROP_MIN);
+
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value)
: rval_min);
} else
if (!e->anti) { /* max is min from both max */
-
if ((lval = find_prop_and_get(le->p, PROP_MAX))) {
-
p = find_prop(e->p, PROP_MAX);
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval,
p->value) : lval);
-
}
+
prop *p = find_prop(e->p, PROP_MAX);
+
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value)
: lval_max);
}
} break;
case cmp_lt:
case cmp_lte: {
if
(!e->anti && lne) { /* max is min from both max */
-
if ((rval = find_prop_and_get(re->p, PROP_MAX))) {
-
p = find_prop(e->p, PROP_MAX);
-
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval,
p->value) : rval);
-
}
+
prop *p = find_prop(e->p, PROP_MAX);
+
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value)
: rval_max);
} else
if (!e->anti) { /* min is max from both min */
-
if ((lval = find_prop_and_get(le->p, PROP_MIN))) {
-
p = find_prop(e->p, PROP_MIN);
-
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval,
p->value) : lval);
-
}
+
prop *p = find_prop(e->p, PROP_MIN);
+
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value)
: lval_min);
}
} break;
default: /*
Maybe later I can do cmp_in and cmp_notin */
@@ -179,17 +159,19 @@ rel_propagate_column_ref_statistics(mvc
case op_except:
case op_inter:
case op_project:
- case op_groupby:
+ case op_groupby: {
+ atom *fval;
if ((found = rel_find_exp(rel, e)) && rel->op !=
op_table) {
- if ((lval = find_prop_and_get(found->p,
PROP_MAX)))
- set_property(sql, e, PROP_MAX, lval);
- if ((lval = find_prop_and_get(found->p,
PROP_MIN)))
- set_property(sql, e, PROP_MIN, lval);
+ if ((fval = find_prop_and_get(found->p,
PROP_MAX)))
+ set_property(sql, e, PROP_MAX, fval);
+ if ((fval = find_prop_and_get(found->p,
PROP_MIN)))
+ set_property(sql, e, PROP_MIN, fval);
if (!has_nil(found))
set_has_no_nil(e);
return e;
}
return NULL;
+ }
case op_topn:
case op_sample:
return rel_propagate_column_ref_statistics(sql,
rel->l, e);
@@ -448,16 +430,16 @@ rel_get_statistics(visitor *v, sql_rel *
case op_select:
case op_project:
case op_groupby:
- case op_insert:
- case op_update:
- case op_delete:
+ case op_ddl:
rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0,
&rel_propagate_statistics, false);
if ((is_simple_project(rel->op) || is_groupby(rel->op)) &&
!list_empty(rel->r))
rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0,
&rel_propagate_statistics, false);
break;
/*These relations are less important for now
- case op_ddl:
case op_table:
+ case op_insert:
+ case op_update:
+ case op_delete:
case op_truncate:
case op_topn:
case op_sample:*/
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list
