Changeset: 82ea119d8d57 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=82ea119d8d57
Modified Files:
        gdk/gdk_aggr.c
Branch: default
Log Message:

Use binary search to iterate over groups in grouped quantiles.


diffs (79 lines):

diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2369,6 +2369,13 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
        return BATgroupquantile(b,g,e,s,tp,0.5,skip_nils,abort_on_error);
 }
 
+#if SIZEOF_OID == SIZEOF_INT
+#define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) 
binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, 
last)
+#endif
+#if SIZEOF_OID == SIZEOF_LNG
+#define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) 
binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, 
last)
+#endif
+
 BAT *
 BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile,
                 int skip_nils, int abort_on_error)
@@ -2469,36 +2476,35 @@ BATgroupquantile(BAT *b, BAT *g, BAT *e,
                bi = bat_iterator(b);
 
                grps = (const oid *) Tloc(g, 0);
-               prev = grps[0];
                 /* for each group (r and p are the beginning and end
                  * of the current group, respectively) */
-               for (r = 0, p = 1, q = BATcount(g); p <= q; p++) {
-                       assert(r < p);
-                       if (p == q || grps[p] != prev) {
-                               BUN qindex;
-                               if (skip_nils && !b->tnonil) {
-                                       r = binsearch(NULL, 0, tp, Tloc(b, 0),
-                                                     b->tvheap ? 
b->tvheap->base : NULL,
-                                                     b->twidth, r, p, nil,
-                                                     1, 1);
-                               }
-                               if (r == p) {
-                                       v = nil;
-                                       nils++;
-                               } else {
-                                       /* round *down* to nearest integer */
-                                       qindex = r + p - (BUN) (p + 0.5 - (p - 
r - 1) * quantile);
-                                       /* be a little paranoid about the index 
*/
-                                       assert(qindex >= r && qindex <  p);
-                                       v = BUNtail(bi, qindex);
+               for (r = 0, q = BATcount(g); r < q; r = p) {
+                       BUN qindex;
+                       prev = grps[r];
+                       /* search for end of current group (grps is
+                        * sorted so we can use binary search) */
+                       p = binsearch_oid(NULL, 0, grps, r, q - 1, prev, 1, 1);
+                       if (skip_nils && !b->tnonil) {
+                               /* within group, locate start of non-nils */
+                               r = binsearch(NULL, 0, tp, Tloc(b, 0),
+                                             b->tvheap ? b->tvheap->base : 
NULL,
+                                             b->twidth, r, p, nil,
+                                             1, 1);
+                       }
+                       if (r == p) {
+                               /* no non-nils found */
+                               v = nil;
+                               nils++;
+                       } else {
+                               /* round *down* to nearest integer */
+                               qindex = r + p - (BUN) (p + 0.5 - (p - r - 1) * 
quantile);
+                               /* be a little paranoid about the index */
+                               assert(qindex >= r && qindex <  p);
+                               v = BUNtail(bi, qindex);
+                               if (!skip_nils && !b->tnonil)
                                        nils += (*atomcmp)(v, nil) == 0;
-                               }
-                               bunfastapp_nocheck(bn, BUNlast(bn), v, 
Tsize(bn));
-
-                               r = p;
-                               if (p < q)
-                                       prev = grps[p];
                        }
+                       bunfastapp_nocheck(bn, BUNlast(bn), v, Tsize(bn));
                }
                nils += ngrp - BATcount(bn);
                while (BATcount(bn) < ngrp) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to