Changeset: 55bc8ca94bfe for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/55bc8ca94bfe
Modified Files:
        gdk/gdk_histogram.c
Branch: histograms
Log Message:

Building generic histograms. Many corrections


diffs (110 lines):

diff --git a/gdk/gdk_histogram.c b/gdk/gdk_histogram.c
--- a/gdk/gdk_histogram.c
+++ b/gdk/gdk_histogram.c
@@ -34,10 +34,10 @@ static_assert(SAMPLE_SIZE < INT_MAX, "Th
 #endif
 
 #define histogram_entry(TPE)   \
-typedef struct HistogramEntry_##TPE {  \
+typedef struct HistogramBucket_##TPE { \
        TPE min, max;   /* The minimum and maximum value in the range */        
\
        int count;      /* number of values */  \
-} HistogramEntry_##TPE;
+} HistogramBucket_##TPE;
 
 histogram_entry(bte)
 histogram_entry(sht)
@@ -131,23 +131,22 @@ min_and_max_values(BATiter *bi, ValPtr m
        do { \
                TPE i = *(TPE*)VALget(min), ii = i, j = *(TPE*)VALget(max); \
                const TPE *restrict v = Tloc(sam, 0); \
-               BUN k = 0, l = BATcount(sam);   \
+               BUN c = BATcount(sam), m;       \
+               HistogramBucket_##TPE *restrict hist; \
        \
                h->nbuckets = (int) (j - i + 1); \
-               if (!(h->histogram = GDKmalloc(sizeof(HistogramEntry_##TPE) * 
h->nbuckets))) \
+               if (!(h->histogram = GDKmalloc(sizeof(HistogramBucket_##TPE) * 
h->nbuckets))) \
                        return NULL; \
        \
-               while (true) { /* Initialize bucket ranges */ \
-                       HistogramEntry_##TPE *restrict e = 
&(((HistogramEntry_##TPE *)h->histogram)[k++]); \
-                       e->min = e->max = ii; /* TODO? maybe I don't need to 
materialize this for 'perfect' histograms */ \
-                       e->count = 0; \
-                       if (ii == j) \
-                               break; \
-                       ii++; \
+               hist = (HistogramBucket_##TPE *) h->histogram; \
+               m = (BUN) h->nbuckets; \
+               for (BUN k = 0 ; k < m ; k++) {  /* Initialize bucket ranges */ 
\
+                       HistogramBucket_##TPE *restrict b = &(hist[k]); \
+                       b->min = b->max = ii++; /* TODO? maybe I don't need to 
materialize this for 'perfect' histograms */ \
+                       b->count = 0; \
                } \
        \
-               HistogramEntry_##TPE *restrict hist = (HistogramEntry_##TPE 
*)h->histogram; \
-               for (BUN k = 0 ; k < l ; k++) { /* Now create the histogram */\
+               for (BUN k = 0 ; k < c ; k++) { /* Now create the histogram */ \
                        TPE next = v[k]; \
                        if (is_##TPE##_nil(next)) { \
                                nulls++; \
@@ -192,33 +191,46 @@ create_perfect_histogram(BAT *sam, Histo
        do { \
                TPE i = *(TPE*)VALget(min), ii = i, j = *(TPE*)VALget(max), 
gap; \
                const TPE *restrict v = Tloc(sam, 0); \
-               BUN l = BATcount(sam);  \
+               BUN c = BATcount(sam);  \
+               HistogramBucket_##TPE *restrict hist; \
        \
                h->nbuckets = NBUCKETS; \
-               if (!(h->histogram = GDKmalloc(sizeof(HistogramEntry_##TPE) * 
h->nbuckets))) \
+               if (!(h->histogram = GDKmalloc(sizeof(HistogramBucket_##TPE) * 
h->nbuckets))) \
                        return NULL; \
-               gap = (TPE) floor((double) (j - i) / (double) h->nbuckets); \
+               gap = (TPE) floor((double) (j - i) / (double) h->nbuckets); /* 
TODO this may overflow for large integer types */ \
        \
+               hist = (HistogramBucket_##TPE *) h->histogram; \
                for (BUN k = 0 ; k < NBUCKETS ; k++) { \
-                       HistogramEntry_##TPE *restrict e = 
&(((HistogramEntry_##TPE *)h->histogram)[k]); \
-                       e->min = ii;    \
-                       ii += gap; \
+                       HistogramBucket_##TPE *restrict b = &(hist[k]); \
+                       b->min = ii;    \
                        if (k == NBUCKETS - 1) { \
-                               e->max = ii; \
+                               b->max = j; /* The last bucket will contain the 
max value inclusive */ \
                        } else { \
-                               e->max = j; /* The last bucket will contain the 
max value inclusive */ \
+                               ii += gap; \
+                               b->max = ii; \
                        } \
-                       e->count = 0; \
+                       b->count = 0; \
                } \
        \
-               HistogramEntry_##TPE *restrict hist = (HistogramEntry_##TPE 
*)h->histogram; \
-               for (BUN k = 0 ; k < l ; k++) { /* Now create the histogram */\
+               for (BUN k = 0 ; k < c ; k++) { /* Now create the histogram */ \
                        TPE next = v[k]; \
                        if (is_##TPE##_nil(next)) { \
                                nulls++; \
                        } else { \
-                               (void) hist; \
-                               /* ... */ \
+                               int l = 0, r = NBUCKETS - 1; \
+                               while (l <= r) { /* Do binary search to find 
the bucket */ \
+                                       int m = l + (r - l) / 2; \
+                                       HistogramBucket_##TPE *restrict b = 
&(hist[m]); \
+                                       /* Check if on the bucket. Don't forget 
last bucket case where max is inclusive */ \
+                                       if (next >= b->min && (next < b->max || 
(next == j && m == (NBUCKETS - 1)))) { \
+                                               b->count++; \
+                                       } else if (next < b->min) { /* value is 
smaller, ignore right half */ \
+                                               r = m - 1; \
+                                       } else { /* value is greater, ignore 
left half */ \
+                                               l = m + 1; \
+                                       } \
+                               } \
+                               assert(0); /* the value must be found */ \
                        } \
                } \
                h->nulls = nulls; \
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to