Changeset: 0f6db86a9e4b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/0f6db86a9e4b
Modified Files:
        sql/server/rel_statistics.c
        sql/server/sql_mvc.c
        sql/server/sql_mvc.h
        sql/storage/bat/bat_storage.c
        sql/storage/sql_storage.h
        sql/storage/store.c
Branch: properties
Log Message:

Propagate unique flag


diffs (203 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -41,7 +41,7 @@ rel_propagate_column_ref_statistics(mvc 
                case op_select:
                case op_anti:
                case op_semi: {
-                       bool found_without_semantics = false, found_left = 
false, found_right = false;
+                       bool found_without_semantics = false, found_left = 
false, found_right = false, still_unique = true;
 
                        if ((is_innerjoin(rel->op) || is_select(rel->op)) && 
list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
                                return NULL; /* nothing will pass, skip */
@@ -54,17 +54,18 @@ rel_propagate_column_ref_statistics(mvc 
 
                        if (!found_left && !found_right)
                                return NULL;
+                       still_unique = !list_empty(rel->exps); /* cartesian 
products */
                        if (!list_empty(rel->exps) && rel->op != op_anti) { /* 
if there's an or, the MIN and MAX get difficult to propagate */
                                for (node *n = rel->exps->h ; n ; n = n->next) {
                                        sql_exp *comp = n->data, *le = comp->l, 
*lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL;
 
                                        if (comp->type == e_cmp) {
-
                                                if (is_theta_exp(comp->flag) && 
((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) 
|| (fe && (fne = comparison_find_column(fe, e))))) {
                                                        atom *lval_min = 
find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, 
PROP_MAX),
                                                                 *rval_min = 
find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, 
PROP_MAX);
 
                                                        found_without_semantics 
|= !comp->semantics;
+                                                       still_unique &= 
comp->flag == cmp_equal && is_unique(le) && is_unique(re); /* unique if only 
equi-joins on unique columns are there */
                                                        if (is_full(rel->op) || 
(is_left(rel->op) && found_left) || (is_right(rel->op) && found_right)) /* on 
outer joins, min and max cannot be propagated on some cases */
                                                                continue;
                                                        /* if (end2 >= start1 
&& start2 <= end1) then the 2 intervals are intersected */
@@ -145,7 +146,11 @@ rel_propagate_column_ref_statistics(mvc 
                                                                        break;
                                                                }
                                                        }
+                                               } else {
+                                                       still_unique = false;
                                                }
+                                       } else {
+                                               still_unique = false;
                                        }
                                }
                        }
@@ -153,6 +158,8 @@ rel_propagate_column_ref_statistics(mvc 
                                set_has_nil(e);
                        if (!is_outerjoin(rel->op) && found_without_semantics) 
/* at an outer join, null values pass */
                                set_has_no_nil(e);
+                       if (is_unique(e) && is_join(rel->op) && !still_unique)
+                               set_not_unique(e);
                        return e;
                }
                case op_table:
@@ -171,6 +178,9 @@ rel_propagate_column_ref_statistics(mvc 
                                        set_property(sql, e, PROP_MIN, fval);
                                if (!has_nil(found))
                                        set_has_no_nil(e);
+                               if (is_unique(found) || (need_distinct(rel) && 
list_length(rel->exps) == 1) ||
+                                       (is_groupby(rel->op) && 
list_length(rel->r) == 1 && exps_find_exp(rel->r, e)))
+                                       set_unique(e);
                                return e;
                        }
                        return NULL;
@@ -204,12 +214,14 @@ rel_basetable_get_statistics(visitor *v,
 
        (void)depth;
        if ((c = name_find_column(rel, exp_relname(e), exp_name(e), -2, NULL))) 
{
-               bool nonil = false;
+               bool nonil = false, unique = false;
                ValRecord min, max;
-               int ok = mvc_col_stats(sql, c, &nonil, &min, &max);
+               int ok = mvc_col_stats(sql, c, &nonil, &unique, &min, &max);
 
                if (has_nil(e) && nonil)
                        set_has_no_nil(e);
+               if (!is_unique(e) && unique)
+                       set_unique(e);
                if ((ok & 1) == 1) {
                        if (!VALisnil(&min)) {
                                prop *p = e->p = prop_create(sql->sa, PROP_MIN, 
e->p);
@@ -260,13 +272,19 @@ rel_setop_get_statistics(mvc *sql, sql_r
        if (is_union(rel->op)) {
                if (!has_nil(le) && !has_nil(re))
                        set_has_no_nil(e);
+               if (need_distinct(rel) && list_length(rel->exps) == 1)
+                       set_unique(e);
        } else if (is_inter(rel->op)) {
                if (!has_nil(le) || !has_nil(re))
                        set_has_no_nil(e);
+               if (is_unique(le) || (need_distinct(rel) && 
list_length(rel->exps) == 1))
+                       set_unique(e);
        } else {
                assert(is_except(rel->op));
                if (!has_nil(le))
                        set_has_no_nil(e);
+               if (is_unique(le) || (need_distinct(rel) && 
list_length(rel->exps) == 1))
+                       set_unique(e);
        }
        return false;
 }
@@ -389,7 +407,7 @@ rel_propagate_statistics(visitor *v, sql
                }
        } break;
        case e_cmp:
-               /* propagating min and max of booleans is not very worth it */
+               /* TODO? propagating min/max/unique of booleans is not very 
worth it */
                if (e->flag == cmp_or || e->flag == cmp_filter) {
                        if (!have_nil(e->l) && !have_nil(e->r))
                                set_has_no_nil(e);
diff --git a/sql/server/sql_mvc.c b/sql/server/sql_mvc.c
--- a/sql/server/sql_mvc.c
+++ b/sql/server/sql_mvc.c
@@ -1541,10 +1541,10 @@ mvc_is_duplicate_eliminated(mvc *m, sql_
 }
 
 int
-mvc_col_stats(mvc *m, sql_column *col, bool *nonil, ValPtr min, ValPtr max)
+mvc_col_stats(mvc *m, sql_column *col, bool *nonil, bool *unique, ValPtr min, 
ValPtr max)
 {
        TRC_DEBUG(SQL_TRANS, "Retrieving column stats for: %s\n", 
col->base.name);
-       return sql_trans_col_stats(m->session->tr, col, nonil, min, max);
+       return sql_trans_col_stats(m->session->tr, col, nonil, unique, min, 
max);
 }
 
 int
diff --git a/sql/server/sql_mvc.h b/sql/server/sql_mvc.h
--- a/sql/server/sql_mvc.h
+++ b/sql/server/sql_mvc.h
@@ -218,7 +218,7 @@ extern int mvc_access(mvc *m, sql_table 
 extern int mvc_is_sorted(mvc *c, sql_column *col);
 extern int mvc_is_unique(mvc *m, sql_column *col);
 extern int mvc_is_duplicate_eliminated(mvc *c, sql_column *col);
-extern int mvc_col_stats(mvc *m, sql_column *col, bool *nonil, ValPtr min, 
ValPtr max);
+extern int mvc_col_stats(mvc *m, sql_column *col, bool *nonil, bool *unique, 
ValPtr min, ValPtr max);
 
 extern int mvc_create_ukey(sql_key **kres, mvc *m, sql_table *t, const char 
*name, key_type kt);
 extern int mvc_create_fkey(sql_fkey **kres, mvc *m, sql_table *t, const char 
*name, key_type kt, sql_key *rkey, int on_delete, int on_update);
diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c
--- a/sql/storage/bat/bat_storage.c
+++ b/sql/storage/bat/bat_storage.c
@@ -2705,7 +2705,7 @@ double_elim_col(sql_trans *tr, sql_colum
 }
 
 static int
-col_stats(sql_trans *tr, sql_column *c, bool *nonil, ValPtr min, ValPtr max)
+col_stats(sql_trans *tr, sql_column *c, bool *nonil, bool *unique, ValPtr min, 
ValPtr max)
 {
        int ok = 0;
        BAT *b = NULL;
@@ -2713,6 +2713,7 @@ col_stats(sql_trans *tr, sql_column *c, 
 
        assert(tr->active);
        *nonil = false;
+       *unique = false;
        if (!c || !isTable(c->t) || !c->t->s)
                return ok;
 
@@ -2736,6 +2737,7 @@ col_stats(sql_trans *tr, sql_column *c, 
                                bat_iterator_end(&bi);
                                bat_destroy(b);
                        }
+                       *unique = d->cs.ucnt == 0 && (d->cs.st == ST_DICT ? 
(((b = quick_descriptor(d->cs.bid)) != NULL) && b->tkey) : b->tkey);
                        if (*nonil && d->cs.ucnt > 0)
                                *nonil &= ((b = quick_descriptor(d->cs.uvbid)) 
!= NULL) && b->tnonil && !b->tnil;
                }
diff --git a/sql/storage/sql_storage.h b/sql/storage/sql_storage.h
--- a/sql/storage/sql_storage.h
+++ b/sql/storage/sql_storage.h
@@ -154,7 +154,7 @@ typedef size_t (*count_idx_fptr) (sql_tr
 typedef size_t (*dcount_col_fptr) (sql_trans *tr, sql_column *c);
 typedef int (*min_max_col_fptr) (sql_trans *tr, sql_column *c);
 typedef int (*prop_col_fptr) (sql_trans *tr, sql_column *c);
-typedef int (*proprec_col_fptr) (sql_trans *tr, sql_column *c, bool *nonil, 
ValPtr min, ValPtr max);
+typedef int (*proprec_col_fptr) (sql_trans *tr, sql_column *c, bool *nonil, 
bool *unique, ValPtr min, ValPtr max);
 
 /*
 -- create the necessary storage resources for columns, indices and tables
@@ -396,7 +396,7 @@ extern int sql_trans_alter_storage(sql_t
 extern int sql_trans_is_sorted(sql_trans *tr, sql_column *col);
 extern int sql_trans_is_unique(sql_trans *tr, sql_column *col);
 extern int sql_trans_is_duplicate_eliminated(sql_trans *tr, sql_column *col);
-extern int sql_trans_col_stats(sql_trans *tr, sql_column *col, bool *nonil, 
ValPtr min, ValPtr max);
+extern int sql_trans_col_stats(sql_trans *tr, sql_column *col, bool *nonil, 
bool *unique, ValPtr min, ValPtr max);
 extern size_t sql_trans_dist_count(sql_trans *tr, sql_column *col);
 extern int sql_trans_ranges(sql_trans *tr, sql_column *col, void **min, void 
**max);
 
diff --git a/sql/storage/store.c b/sql/storage/store.c
--- a/sql/storage/store.c
+++ b/sql/storage/store.c
@@ -6173,11 +6173,11 @@ sql_trans_is_duplicate_eliminated( sql_t
 }
 
 int
-sql_trans_col_stats( sql_trans *tr, sql_column *col, bool *nonil, ValPtr min, 
ValPtr max )
+sql_trans_col_stats( sql_trans *tr, sql_column *col, bool *nonil, bool 
*unique, ValPtr min, ValPtr max )
 {
        sqlstore *store = tr->store;
        if (col && isTable(col->t) && store->storage_api.col_stats)
-               return store->storage_api.col_stats(tr, col, nonil, min, max);
+               return store->storage_api.col_stats(tr, col, nonil, unique, 
min, max);
        return 0;
 }
 
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to