Changeset: a67840bbc2aa for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a67840bbc2aa
Modified Files:
        sql/common/sql_backend.c
        sql/include/sql_catalog.h
        sql/storage/bat/bat_storage.c
        sql/storage/bat/bat_table.c
        sql/storage/sql_storage.h
        sql/storage/store.c
Branch: default
Log Message:

keep distinct count and min/max in the column structures


diffs (168 lines):

diff --git a/sql/common/sql_backend.c b/sql/common/sql_backend.c
--- a/sql/common/sql_backend.c
+++ b/sql/common/sql_backend.c
@@ -105,6 +105,7 @@ backend_schema_user_dependencies(ptr tra
                return(be_funcs.fschuserdep(trans, schema_id));
        return NULL;
 }
+
 int    
 backend_resolve_function(ptr M, sql_func *f)
 {
diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
--- a/sql/include/sql_catalog.h
+++ b/sql/include/sql_catalog.h
@@ -435,6 +435,8 @@ typedef struct sql_column {
        char *storage_type;
        int sorted;             /* for DECLARED (dupped tables) we keep order 
info */
        size_t dcount;
+       char *min;
+       char *max;
 
        struct sql_table *t;
        void *data;
diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c
--- a/sql/storage/bat/bat_storage.c
+++ b/sql/storage/bat/bat_storage.c
@@ -788,6 +788,39 @@ count_col(sql_trans *tr, sql_column *c, 
 }
 
 static size_t
+dcount_col(sql_trans *tr, sql_column *c)
+{
+       sql_delta *b;
+
+       if (!c->data) {
+               sql_column *oc = tr_find_column(tr->parent, c);
+               c->data = timestamp_delta(oc->data, tr->stime);
+       }
+        b = c->data;
+       if (!b)
+               return 1;
+       if (b->cnt > 1024) {
+               size_t dcnt = 0;
+               dbl f = 1.0;
+               BAT *v = delta_bind_bat(b, RDONLY, 0), *o = v, *u;
+
+               if ((dcnt = BATcount(v)) > 1024*1024) {
+                       v = BATsample(v, 1024);
+                       f = dcnt/1024.0;
+               }
+               u = BATsubunique(v, NULL);
+               bat_destroy(o);
+               if (v!=o)
+                       bat_destroy(v);
+               dcnt = BATcount(u)*f;
+               bat_destroy(u);
+               return dcnt;
+       } else {
+               return 64;
+       }
+}
+
+static size_t
 count_idx(sql_trans *tr, sql_idx *i, int all)
 {
        sql_delta *b;
@@ -2234,6 +2267,7 @@ bat_storage_init( store_functions *sf)
        sf->count_del = (count_del_fptr)&count_del;
        sf->count_col = (count_col_fptr)&count_col;
        sf->count_idx = (count_idx_fptr)&count_idx;
+       sf->dcount_col = (dcount_col_fptr)&dcount_col;
        sf->sorted_col = (prop_col_fptr)&sorted_col;
        sf->double_elim_col = (prop_col_fptr)&double_elim_col;
 
diff --git a/sql/storage/bat/bat_table.c b/sql/storage/bat/bat_table.c
--- a/sql/storage/bat/bat_table.c
+++ b/sql/storage/bat/bat_table.c
@@ -367,6 +367,21 @@ rids_destroy(rids *r)
        _DELETE(r);
 }
 
+static rids *
+rids_join(sql_trans *tr, rids *l, sql_column *lc, rids *r, sql_column *rc)
+{
+       BAT *lcb, *rcb, *s = NULL;
+       
+       lcb = full_column(tr, lc);
+       rcb = full_column(tr, rc);
+       BATsubjoin(&s, NULL, lcb, rcb, l->data, r->data, FALSE, BATcount(lcb));
+       bat_destroy(l->data);
+       l->data = s;
+       bat_destroy(lcb);
+       bat_destroy(rcb);
+       return l;
+}
+
 int 
 bat_table_init( table_functions *tf )
 {
@@ -379,6 +394,7 @@ bat_table_init( table_functions *tf )
        
        tf->rids_select = rids_select;
        tf->rids_orderby = rids_orderby;
+       tf->rids_join = rids_join;
        tf->rids_next = rids_next;
        tf->rids_destroy = rids_destroy;
        return LOG_OK;
diff --git a/sql/storage/sql_storage.h b/sql/storage/sql_storage.h
--- a/sql/storage/sql_storage.h
+++ b/sql/storage/sql_storage.h
@@ -49,6 +49,7 @@ typedef int (*table_delete_fptr)(sql_tra
 typedef struct rids {
        BUN cur;
        void *data;
+       BUN l,h;        /* subselect using slices */
 } rids;
 
 /* returns table rids, for the given select ranges */
@@ -104,6 +105,7 @@ typedef void (*delete_tab_fptr) (sql_tra
 typedef size_t (*count_del_fptr) (sql_trans *tr, sql_table *t);
 typedef size_t (*count_col_fptr) (sql_trans *tr, sql_column *c, int all /* all 
or new only */);
 typedef size_t (*count_idx_fptr) (sql_trans *tr, sql_idx *i, int all /* all or 
new only */);
+typedef size_t (*dcount_col_fptr) (sql_trans *tr, sql_column *c);
 typedef int (*prop_col_fptr) (sql_trans *tr, sql_column *c);
 
 /*
@@ -180,6 +182,7 @@ typedef struct store_functions {
        count_del_fptr count_del;
        count_col_fptr count_col;
        count_idx_fptr count_idx;
+       dcount_col_fptr dcount_col;
        prop_col_fptr sorted_col;
        prop_col_fptr double_elim_col; /* varsize col with double elimination */
 
diff --git a/sql/storage/store.c b/sql/storage/store.c
--- a/sql/storage/store.c
+++ b/sql/storage/store.c
@@ -4464,6 +4464,8 @@ sql_trans_dist_count( sql_trans *tr, sql
 
                                col->dcount = *(size_t*)v; 
                                _DELETE(v);
+                       } else { /* sample and put in statistics */
+                               col->dcount = store_funcs.dcount_col(tr, col);
                        }
                }
                return col->dcount;
@@ -4478,6 +4480,12 @@ sql_trans_ranges( sql_trans *tr, sql_col
                /* get from statistics */
                sql_schema *sys = find_sql_schema(tr, "sys");
                sql_table *stats = find_sql_table(sys, "statistics");
+
+               if (col->min && col->max) {
+                       *min = col->min;
+                       *max = col->max;
+                       return 1;
+               }
                if (stats) {
                        sql_column *stats_column_id = find_sql_column(stats, 
"column_id");
                        oid rid = table_funcs.column_find_row(tr, 
stats_column_id, &col->base.id, NULL);
@@ -4487,6 +4495,8 @@ sql_trans_ranges( sql_trans *tr, sql_col
 
                                *min = table_funcs.column_find_value(tr, 
stats_min, rid);
                                *max = table_funcs.column_find_value(tr, 
stats_max, rid);
+                               col->min = *min;
+                               col->max = *max;
                                return 1;
                        }
                }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to