Changeset: 51548bcfb19c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/51548bcfb19c
Branch: default
Log Message:

Merge with Sep2022 branch.


diffs (truncated from 472 to 300 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -15,17 +15,17 @@ static sql_exp *
 comparison_find_column(sql_exp *input, sql_exp *e)
 {
        switch (input->type) {
-               case e_convert: {
-                       list *types = (list *)input->r;
-                       sql_class from = 
((sql_subtype*)types->h->data)->type->eclass, to = 
((sql_subtype*)types->h->next->data)->type->eclass;
-                       if (from == to)
-                               return comparison_find_column(input->l, e) ? 
input : NULL;
-                       return NULL;
-               }
-               case e_column:
-                       return exp_match(e, input) ? input : NULL;
-               default:
-                       return NULL;
+       case e_convert: {
+               list *types = (list *)input->r;
+               sql_class from = ((sql_subtype*)types->h->data)->type->eclass, 
to = ((sql_subtype*)types->h->next->data)->type->eclass;
+               if (from == to)
+                       return comparison_find_column(input->l, e) ? input : 
NULL;
+               return NULL;
+       }
+       case e_column:
+               return exp_match(e, input) ? input : NULL;
+       default:
+               return NULL;
        }
 }
 
@@ -62,7 +62,7 @@ rel_propagate_column_ref_statistics(mvc 
                                        if (comp->type == e_cmp) {
                                                if (is_theta_exp(comp->flag) && 
((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) 
|| (fe && (fne = comparison_find_column(fe, e))))) {
                                                        atom *lval_min = 
find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, 
PROP_MAX),
-                                                                *rval_min = 
find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, 
PROP_MAX);
+                                                               *rval_min = 
find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, 
PROP_MAX);
 
                                                        /* not semantics found 
or if explicitly filtering not null values from the column */
                                                        found_without_semantics 
|= !is_semantics(comp) || (comp->flag == cmp_equal && lne && is_anti(comp) && 
exp_is_null(re));
@@ -115,16 +115,16 @@ rel_propagate_column_ref_statistics(mvc 
                                                        } else if (lval_min && 
lval_max && rval_min && rval_max && atom_cmp(rval_max, lval_min) >= 0 && 
atom_cmp(rval_min, lval_max) <= 0) {
                                                                /* both min and 
max must be set and the intervals must overlap */
                                                                switch 
(comp->flag) {
-                                                               case cmp_equal: 
{ /* for equality reduce */
+                                                               case cmp_equal: 
/* for equality reduce */
                                                                        
set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_max(sql, 
lval_max, rval_max) : statistics_atom_min(sql, lval_max, rval_max));
                                                                        
set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_min(sql, 
lval_min, rval_min) : statistics_atom_max(sql, lval_min, rval_min));
-                                                               } break;
-                                                               case 
cmp_notequal: { /* for inequality expand */
+                                                                       break;
+                                                               case 
cmp_notequal: /* for inequality expand */
                                                                        
set_minmax_property(sql, e, PROP_MAX, is_anti(comp) ? statistics_atom_min(sql, 
lval_max, rval_max) : statistics_atom_max(sql, lval_max, rval_max));
                                                                        
set_minmax_property(sql, e, PROP_MIN, is_anti(comp) ? statistics_atom_max(sql, 
lval_min, rval_min) : statistics_atom_min(sql, lval_min, rval_min));
-                                                               } break;
+                                                                       break;
                                                                case cmp_gt:
-                                                               case cmp_gte: {
+                                                               case cmp_gte:
                                                                        if 
(!is_anti(comp) && lne) { /* min is max from both min */
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, rval_min, 
p->value.pval) : rval_min);
@@ -132,9 +132,9 @@ rel_propagate_column_ref_statistics(mvc 
                                                                                
prop *p = find_prop(e->p, PROP_MAX);
                                                                                
set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, lval_max, 
p->value.pval) : lval_max);
                                                                        }
-                                                               } break;
+                                                                       break;
                                                                case cmp_lt:
-                                                               case cmp_lte: {
+                                                               case cmp_lte:
                                                                        if 
(!is_anti(comp) && lne) { /* max is min from both max */
                                                                                
prop *p = find_prop(e->p, PROP_MAX);
                                                                                
set_minmax_property(sql, e, PROP_MAX, p ? statistics_atom_min(sql, rval_max, 
p->value.pval) : rval_max);
@@ -142,7 +142,7 @@ rel_propagate_column_ref_statistics(mvc 
                                                                                
prop *p = find_prop(e->p, PROP_MIN);
                                                                                
set_minmax_property(sql, e, PROP_MIN, p ? statistics_atom_max(sql, lval_min, 
p->value.pval) : lval_min);
                                                                        }
-                                                               } break;
+                                                                       break;
                                                                default: /* 
Maybe later I can do cmp_in and cmp_notin */
                                                                        break;
                                                                }
@@ -185,7 +185,7 @@ rel_propagate_column_ref_statistics(mvc 
                                                prop *p = e->p = 
prop_create(sql->sa, PROP_NUNIQUES, e->p);
                                                p->value.dval = 1;
                                        } else if (((is_basetable(rel->op) || 
is_except(rel->op) || is_inter(rel->op) || is_simple_project(rel->op) ||
-                                                               
(is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
+                                                                
(is_groupby(rel->op) && exps_find_exp(rel->r, e))) &&
                                                                (est = 
find_prop(found->p, PROP_NUNIQUES)) && !find_prop(e->p, PROP_NUNIQUES))) {
                                                prop *p = e->p = 
prop_create(sql->sa, PROP_NUNIQUES, e->p);
                                                p->value.dval = est->value.dval;
@@ -197,7 +197,7 @@ rel_propagate_column_ref_statistics(mvc 
                }
                case op_topn:
                case op_sample:
-                        return rel_propagate_column_ref_statistics(sql, 
rel->l, e);
+                       return rel_propagate_column_ref_statistics(sql, rel->l, 
e);
                default:
                        break;
                }
@@ -257,7 +257,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
 {
        sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
        atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = 
find_prop_and_get(le->p, PROP_MAX),
-                *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = 
find_prop_and_get(re->p, PROP_MAX);
+               *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = 
find_prop_and_get(re->p, PROP_MAX);
        prop *est;
 
        /* for the intersection, if both expresssions don't overlap, it can be 
pruned */
@@ -315,7 +315,7 @@ rel_propagate_statistics(visitor *v, sql
 
        (void) depth;
        switch(e->type) {
-       case e_column: {
+       case e_column:
                switch (rel->op) { /* set relations don't call 
rel_propagate_statistics */
                case op_join:
                case op_left:
@@ -326,14 +326,16 @@ rel_propagate_statistics(visitor *v, sql
                        sql_exp *found = 
rel_propagate_column_ref_statistics(sql, rel->l, e);
                        if (!found)
                                (void) rel_propagate_column_ref_statistics(sql, 
rel->r, e);
-               } break;
+                       break;
+               }
                case op_select:
                case op_project:
                case op_groupby: {
                        sql_exp *found = 
rel_propagate_column_ref_statistics(sql, rel->l, e); /* labels may be found on 
the same projection, ugh */
                        if (!found && is_simple_project(rel->op))
                                (void) rel_propagate_column_ref_statistics(sql, 
rel, e);
-               } break;
+                       break;
+               }
                case op_insert:
                case op_update:
                case op_delete:
@@ -342,7 +344,7 @@ rel_propagate_statistics(visitor *v, sql
                default:
                        break;
                }
-       } break;
+               break;
        case e_convert: {
                sql_subtype *to = exp_totype(e), *from = exp_fromtype(e);
                sql_exp *l = e->l;
@@ -370,7 +372,8 @@ rel_propagate_statistics(visitor *v, sql
                }
                if (!has_nil(l))
                        set_has_no_nil(e);
-       } break;
+               break;
+       }
        case e_aggr:
        case e_func: {
                BUN lv;
@@ -402,8 +405,9 @@ rel_propagate_statistics(visitor *v, sql
                        }
                        set_unique(e);
                }
-       } break;
-       case e_atom: {
+               break;
+       }
+       case e_atom:
                if (e->l) {
                        atom *a = (atom*) e->l;
                        if (!a->isnull) {
@@ -454,7 +458,7 @@ rel_propagate_statistics(visitor *v, sql
                        prop *p = e->p = prop_create(sql->sa, PROP_NUNIQUES, 
e->p);
                        p->value.dval = 1;
                }
-       } break;
+               break;
        case e_cmp:
                /* TODO? propagating min/max/unique of booleans is not very 
worth it */
                if (e->flag == cmp_or || e->flag == cmp_filter) {
@@ -521,7 +525,7 @@ rel_prune_predicates(visitor *v, sql_rel
                                always_true |= !has_nil(le) && !has_nil(re) && 
!has_nil(fe) &&
                                        lval_min && lval_max && rval_min && 
rval_max && fval_min && fval_max &&
                                        (is_anti(e) ? ((lower == cmp_gte ? 
atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || 
(higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, 
fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
-                                       ((lower == cmp_gte ? atom_cmp(lval_min, 
rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? 
atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
+                                        ((lower == cmp_gte ? 
atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && 
(higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, 
lval_max) > 0)));
                        } else if (!fe) {
                                if (!is_semantics(e)) /* trival not null cmp 
null case */
                                        always_false |= !is_anti(e) && 
((exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && 
exp_is_not_null(re)));
@@ -670,7 +674,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                return rel;
        rel->used |= statistics_gathered;
 
-       switch(rel->op){
+       switch (rel->op) {
        case op_basetable: {
                sql_table *t = (sql_table *) rel->l;
                sqlstore *store = v->sql->session->tr->store;
@@ -682,7 +686,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                /* Set table row count. TODO? look for remote tables. Don't 
look at storage for declared tables, because it won't be cleaned */
                if (isTable(t) && t->s && !isDeclaredTable(t)) /* count active 
rows only */
                        set_count_prop(v->sql->sa, rel, 
(BUN)store->storage_api.count_col(v->sql->session->tr, 
ol_first_node(t->columns)->data, 10));
-       } break;
+               break;
+       }
        case op_union:
        case op_inter:
        case op_except: {
@@ -777,7 +782,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                        set_nodistinct(rel); /* set relations may have distinct 
flag set */
                        v->changes++;
                }
-       } break;
+               break;
+       }
        case op_join:
        case op_left:
        case op_right:
@@ -859,12 +865,13 @@ rel_get_statistics_(visitor *v, sql_rel 
                        } else if (lv != BUN_NONE && rv != BUN_NONE) {
                                set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX 
/ lv)) ? BUN_MAX : (lv * rv)); /* overflow check */
                        }
-               } break;
-               case op_anti: {
+                       break;
+               }
+               case op_anti:
                        set_count_prop(v->sql->sa, rel, get_rel_count(l));
-               } break;
+                       break;
                case op_semi:
-               case op_select: {
+               case op_select:
                        /* TODO calculate cardinalities using selectivities */
                        if (list_length(rel->exps) == 1 && 
(exp_is_false(rel->exps->h->data) || exp_is_null(rel->exps->h->data))) {
                                set_count_prop(v->sql->sa, rel, 0);
@@ -877,20 +884,21 @@ rel_get_statistics_(visitor *v, sql_rel 
                                                /* simple expressions first */
                                                if (e->type == e_cmp && e->flag 
== cmp_equal && exp_is_atom(er)) {
                                                        /* use selectivity */
-                                                       prop *p = NULL;
+                                                       prop *p;
                                                        if ((p = 
find_prop(el->p, PROP_NUNIQUES))) {
                                                                u = (BUN) 
p->value.dval;
+                                                               break;
                                                        }
                                                }
                                        }
                                        /* u is an *estimate*, so don't set 
count_prop to 0 unless cnt is 0 */
-                                       set_count_prop(v->sql->sa, rel, cnt == 
0 ? 0 : u > cnt ? 1 : cnt/u);
+                                       set_count_prop(v->sql->sa, rel, cnt == 
0 ? 0 : u == 0 || u > cnt ? 1 : cnt/u);
                                } else {
                                        set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
                                }
                        }
-               } break;
-               case op_project: {
+                       break;
+               case op_project:
                        if (l) {
                                if (need_distinct(rel)) {
                                        set_count_prop(v->sql->sa, rel, 
rel_calc_nuniques(v->sql, l, rel->exps));
@@ -908,18 +916,19 @@ rel_get_statistics_(visitor *v, sql_rel 
                                }
                                set_count_prop(v->sql->sa, rel, card);
                        }
-               } break;
-               case op_groupby: {
+                       break;
+               case op_groupby:
                        if (list_empty(rel->r)) {
                                set_count_prop(v->sql->sa, rel, 1);
                        } else {
                                set_count_prop(v->sql->sa, rel, 
rel_calc_nuniques(v->sql, l, rel->r));
                        }
-               } break;
+                       break;
                default:
                        break;
                }
-       } break;
+               break;
+       }
        case op_topn: {
                BUN lv = get_rel_count(rel->l);
 
@@ -935,7 +944,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                        }
                        set_count_prop(v->sql->sa, rel, lv);
                }
-       } break;
+               break;
+       }
        case op_sample: {
                BUN lv = get_rel_count(rel->l);
 
@@ -953,7 +963,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                        }
                        set_count_prop(v->sql->sa, rel, lv);
                }
-       } break;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to