Changeset: e2993cd74db6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e2993cd74db6
Modified Files:
        sql/server/rel_statistics.c
Branch: properties
Log Message:

Attempting to start prunning and small bug fixes


diffs (207 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -40,7 +40,7 @@ rel_propagate_column_ref_statistics(mvc 
                case op_select:
                case op_anti:
                case op_semi: {
-                       bool found_with_semantics = false, found_left = false, 
found_right = false;
+                       bool found_without_semantics = false, found_left = 
false, found_right = false;
 
                        /* propagate from the bottom first */
                        if (rel_propagate_column_ref_statistics(sql, rel->l, e))
@@ -62,15 +62,15 @@ rel_propagate_column_ref_statistics(mvc 
                                                                 *rval_max = 
find_prop_and_get(re->p, PROP_MAX), *fval_min = fe ? find_prop_and_get(re->p, 
PROP_MIN) : NULL, *fval_max = fe ? find_prop_and_get(re->p, PROP_MAX) : NULL;
 
                                                        found = found ? found : 
lne ? lne : rne ? rne : fne;
-                                                       if (e->semantics)
-                                                               
found_with_semantics = true;
+                                                       found_without_semantics 
|= !comp->semantics;
                                                        if 
(is_outerjoin(rel->op)) /* on outer joins, min and max cannot be propagated */
                                                                continue;
+                                                       /* if (end2 >= start1 
&& start2 <= end1) then the 2 intervals are intersected */
                                                        if (fe && lval_min && 
lval_max) { /* range case, the middle expression must intersect the other two */
                                                                int int1 = 
rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && atom_cmp(rval_min, 
lval_max) <= 0,
                                                                        int2 = 
fval_min && fval_max && atom_cmp(fval_max, lval_min) >= 0 && atom_cmp(fval_min, 
lval_max) <= 0;
 
-                                                               if (!e->anti && 
lne && int1 && int2) {
+                                                               if (!comp->anti 
&& lne && int1 && int2) {
                                                                        if 
(comp->flag & CMP_SYMMETRIC) {
                                                                                
prop *p1 = find_prop(e->p, PROP_MIN), *p2 = find_prop(e->p, PROP_MAX);
                                                                                
atom *nmin = statistics_atom_min(sql, rval_min, fval_min), *nmax = 
statistics_atom_max(sql, rval_max, fval_max);
@@ -85,7 +85,7 @@ rel_propagate_column_ref_statistics(mvc 
                                                                                
/* max is min from le and fe max */
                                                                                
set_property(sql, e, PROP_MAX, p2 ? statistics_atom_min(sql, fval_max, 
p2->value) : fval_max);
                                                                        }
-                                                               } else if 
(!e->anti && rne) {
+                                                               } else if 
(!comp->anti && rne) {
                                                                        if 
(comp->flag & CMP_SYMMETRIC && int1 && int2) { /* min is max from le and (min 
from re and fe min) */
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
atom *nmin = p ? statistics_atom_min(sql, p->value, fval_min) : fval_min;
@@ -94,7 +94,7 @@ rel_propagate_column_ref_statistics(mvc 
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value) 
: lval_min);
                                                                        }
-                                                               } else if 
(!e->anti) {
+                                                               } else if 
(!comp->anti) {
                                                                        
assert(fne);
                                                                        if 
(comp->flag & CMP_SYMMETRIC && int1 && int2) { /* max is min from le and (max 
from re and fe max) */
                                                                                
prop *p = find_prop(e->p, PROP_MAX);
@@ -109,29 +109,29 @@ rel_propagate_column_ref_statistics(mvc 
                                                                /* both min and 
max must be set and the intervals must overlap */
                                                                switch 
(comp->flag) {
                                                                case cmp_equal: 
{ /* for equality reduce */
-                                                                       
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_max(sql, lval_max, 
rval_max) : statistics_atom_min(sql, lval_max, rval_max));
-                                                                       
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_min(sql, lval_min, 
rval_min) : statistics_atom_max(sql, lval_min, rval_min));
+                                                                       
set_property(sql, e, PROP_MAX, comp->anti ? statistics_atom_max(sql, lval_max, 
rval_max) : statistics_atom_min(sql, lval_max, rval_max));
+                                                                       
set_property(sql, e, PROP_MIN, comp->anti ? statistics_atom_min(sql, lval_min, 
rval_min) : statistics_atom_max(sql, lval_min, rval_min));
                                                                } break;
                                                                case 
cmp_notequal: { /* for inequality expand */
-                                                                       
set_property(sql, e, PROP_MAX, e->anti ? statistics_atom_min(sql, lval_max, 
rval_max) : statistics_atom_max(sql, lval_max, rval_max));
-                                                                       
set_property(sql, e, PROP_MIN, e->anti ? statistics_atom_max(sql, lval_min, 
rval_min) : statistics_atom_min(sql, lval_min, rval_min));
+                                                                       
set_property(sql, e, PROP_MAX, comp->anti ? statistics_atom_min(sql, lval_max, 
rval_max) : statistics_atom_max(sql, lval_max, rval_max));
+                                                                       
set_property(sql, e, PROP_MIN, comp->anti ? statistics_atom_max(sql, lval_min, 
rval_min) : statistics_atom_min(sql, lval_min, rval_min));
                                                                } break;
                                                                case cmp_gt:
                                                                case cmp_gte: {
-                                                                       if 
(!e->anti && lne) { /* min is max from both min */
+                                                                       if 
(!comp->anti && lne) { /* min is max from both min */
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, p->value) 
: rval_min);
-                                                                       } else 
if (!e->anti) { /* max is min from both max */
+                                                                       } else 
if (!comp->anti) { /* max is min from both max */
                                                                                
prop *p = find_prop(e->p, PROP_MAX);
                                                                                
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, p->value) 
: lval_max);
                                                                        }
                                                                } break;
                                                                case cmp_lt:
                                                                case cmp_lte: {
-                                                                       if 
(!e->anti && lne) { /* max is min from both max */
+                                                                       if 
(!comp->anti && lne) { /* max is min from both max */
                                                                                
prop *p = find_prop(e->p, PROP_MAX);
                                                                                
set_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, p->value) 
: rval_max);
-                                                                       } else 
if (!e->anti) { /* min is max from both min */
+                                                                       } else 
if (!comp->anti) { /* min is max from both min */
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
set_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, p->value) 
: lval_min);
                                                                        }
@@ -146,9 +146,9 @@ rel_propagate_column_ref_statistics(mvc 
                        }
                        if (found) {
                                /* if semantics flag was found, null values 
will pass */
-                               if (is_full(rel->op) || (is_left(rel->op) && 
found_right) || (is_right(rel->op) && found_left) || (has_nil(e) && 
found_with_semantics))
+                               if (is_full(rel->op) || (is_left(rel->op) && 
found_right) || (is_right(rel->op) && found_left) || !found_without_semantics)
                                        set_has_nil(e);
-                               else if (!has_nil(e) || !is_outerjoin(rel->op)) 
/* at an outer join, null values pass */
+                               else if (found_without_semantics || 
!is_outerjoin(rel->op)) /* at an outer join, null values pass */
                                        set_has_no_nil(e);
                        }
                        return e;
@@ -231,13 +231,12 @@ rel_basetable_get_statistics(visitor *v,
 }
 
 static void
-rel_setop_get_statistics(mvc *sql, sql_rel *rel, sql_exp *e, int i)
+rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, 
sql_exp *e, int i)
 {
-       sql_exp *le = list_fetch(((sql_rel*)(rel->l))->exps, i);
-       sql_exp *re = list_fetch(((sql_rel*)(rel->r))->exps, i);
+       sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
        atom *lval, *rval;
 
-       assert(le && e);
+       assert(le && re && e);
        if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = 
find_prop_and_get(re->p, PROP_MAX))) {
                if (is_union(rel->op))
                        set_property(sql, e, PROP_MAX, statistics_atom_max(sql, 
lval, rval)); /* for union the new max will be the max of the two */
@@ -416,8 +415,16 @@ rel_get_statistics(visitor *v, sql_rel *
        case op_inter:
        case op_except: {
                int i = 0;
+               sql_rel *l = rel->l, *r = rel->r;
+               list *lexps = l->exps, *rexps = r->exps;
+
+               if (!is_project(l->op))
+                       lexps = rel_projections(v->sql, l, NULL, 0, 1);
+               if (!is_project(r->op))
+                       rexps = rel_projections(v->sql, r, NULL, 0, 1);
+
                for (node *n = rel->exps->h ; n ; n = n->next) {
-                       rel_setop_get_statistics(v->sql, rel, n->data, i);
+                       rel_setop_get_statistics(v->sql, rel, lexps, rexps, 
n->data, i);
                        i++;
                }
        } break;
@@ -498,6 +505,57 @@ rel_simplify_count(visitor *v, sql_rel *
        return rel;
 }
 
+static sql_rel *
+rel_prune_predicates(visitor *v, sql_rel *rel)
+{
+       mvc *sql = v->sql;
+
+       if ((is_joinop(rel->op) || is_select(rel->op)) && 
!list_empty(rel->exps)) {
+               /* Remove predicates always false from min/max values */
+               for (node *n = rel->exps->h; n ; n = n->next) {
+                       sql_exp *e = n->data;
+                       bool always_false = false;
+
+                       if (e->type == e_cmp && (is_theta_exp(e->flag) || 
e->f)) {
+                               sql_exp *le = e->l, *re = e->r, *fe = e->f;
+                               atom *lval_min = find_prop_and_get(le->p, 
PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX), *rval_min = 
find_prop_and_get(re->p, PROP_MIN),
+                                       *rval_max = find_prop_and_get(re->p, 
PROP_MAX), *fval_min = fe ? find_prop_and_get(re->p, PROP_MIN) : NULL, 
*fval_max = fe ? find_prop_and_get(re->p, PROP_MAX) : NULL;
+
+                               if (fe) {
+                                       (void) fval_min;
+                                       (void) fval_max;
+                               } else {
+                                       switch (e->flag) {
+                                       case cmp_equal:
+                                               if (lval_min && lval_max && 
rval_min && rval_max && !lval_min->isnull && !lval_max->isnull && 
!rval_min->isnull && !rval_max->isnull)
+                                                       always_false |= 
(!e->anti && (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) 
> 0)) || (e->anti && atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, 
rval_max) <= 0);
+                                               if (is_semantics(e))
+                                                       always_false |= 
is_semantics(e) ?
+                                                                               
e->anti ? (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && 
exp_is_not_null(re)) : (exp_is_not_null(le) && exp_is_null(re)) || 
(exp_is_null(le) && exp_is_not_null(re)) :
+                                                                               
e->anti ? exp_is_not_null(le) && exp_is_not_null(re) : (exp_is_null(le) && 
exp_is_null(re)) || (exp_is_not_null(le) && exp_is_null(re)) || 
(exp_is_null(le) && exp_is_not_null(re));
+                                               break;
+                                       case cmp_notequal:
+                                               break;
+                                       case cmp_gt:
+                                       case cmp_gte:
+                                       case cmp_lt:
+                                       case cmp_lte:
+                                       default: /* Maybe later I can do cmp_in 
and cmp_notin */
+                                               break;
+                                       }
+                               }
+                       }
+
+                       if (always_false) {
+                               n->data = exp_atom_bool(sql->sa, 0);
+                               v->changes++;
+                       }
+               }
+       }
+       return rel;
+}
+
+
 sql_rel *
 rel_statistics(mvc *sql, sql_rel *rel)
 {
@@ -508,5 +566,14 @@ rel_statistics(mvc *sql, sql_rel *rel)
        rel = rel_visitor_bottomup(&v, rel, &rel_get_statistics);
        if (gp.cnt[op_groupby])
                rel = rel_visitor_bottomup(&v, rel, &rel_simplify_count);
+       if (gp.cnt[op_select] || gp.cnt[op_left] || gp.cnt[op_right] || 
gp.cnt[op_full] || gp.cnt[op_anti] || gp.cnt[op_join] || gp.cnt[op_semi]) {
+               int cur_changes = v.changes;
+               v.changes = 0; /* reset changes counter */
+               rel = rel_visitor_bottomup(&v, rel, &rel_prune_predicates);
+               if (v.changes > 0) /* there were changes by 
rel_prune_predicates, run rewrite_simplify */
+                       rel = rel_visitor_bottomup(&v, rel, &rewrite_simplify);
+               v.changes += cur_changes;
+       }
+
        return rel;
 }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to