Changeset: 38b754469d60 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=38b754469d60
Modified Files:
        gdk/gdk.h
        gdk/gdk_aggr.c
        gdk/gdk_calc.h
        monetdb5/mal/mal_instruction.c
        monetdb5/modules/kernel/aggr.c
        monetdb5/modules/kernel/aggr.mal
        monetdb5/modules/kernel/aggr.mal.sh
        sql/scripts/39_analytics.sql
        sql/server/rel_dump.c
        tools/merovingian/client/monetdb.c
Branch: default
Log Message:

quantiles second round, should be working on MAL level now.


diffs (truncated from 415 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2275,7 +2275,7 @@ gdk_export str GDKstrdup(const char *s);
                void *_res = GDKmmap(_path, _mode, _len);               \
                ALLOCDEBUG                                              \
                        fprintf(stderr,                                 \
-                               "#GDKmmap(%s,0x%x," SZFMT ") -> " SZFMT \
+                               "#GDKmmap(%s,0x%x," SZFMT ") -> " PTRFMT \
                                " %s[%s:%d]\n",                         \
                                _path ? _path : "NULL", _mode, _len,    \
                                PTRFMTCAST _res,                        \
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2183,10 +2183,16 @@ BATmax(BAT *b, void *aggr)
 
 
 /* ---------------------------------------------------------------------- */
-/* median */
+/* quantiles/median */
 
 BAT *
-BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error)
+
+BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error) {
+       return BATgroupquantile(b,g,e,s,tp,0.5,skip_nils,abort_on_error);
+}
+
+BAT *
+ BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile,int 
skip_nils, int abort_on_error)
 {
        int freeb = 0, freeg = 0;
        oid min, max;
@@ -2201,20 +2207,23 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
        const void *nil;
        int (*atomcmp)(const void *, const void *);
        const char *err;
-
        (void) abort_on_error;
 
        if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
                                    &cnt, &cand, &candend)) != NULL) {
-               GDKerror("BATgroupmedian: %s\n", err);
+               GDKerror("BATgroupquantile: %s\n", err);
                return NULL;
        }
        assert(tp == b->ttype);
        if (!ATOMlinear(b->ttype)) {
-               GDKerror("BATgroupmedian: cannot determine median on "
+               GDKerror("BATgroupquantile: cannot determine quantile on "
                         "non-linear type %s\n", ATOMname(b->ttype));
                return NULL;
        }
+       if (quantile < 0 || quantile > 1) {
+               GDKerror("BATgroupquantile: cannot determine quantile for p=%f 
(p has to be in [0,1])\n",quantile);
+               return NULL;
+       }
 
        if (BATcount(b) == 0 || ngrp == 0) {
                /* trivial: no medians, so return bat aligned with e with
@@ -2305,7 +2314,8 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
                                                           nil, 0, Tsize(bn));
                                        nils++;
                                } else {
-                                       v = BUNtail(bi, BUNfirst(b) + (r + p - 
1) / 2);
+                                       // actual selection of quantile value 
for groups
+                                       v = BUNtail(bi, (oid)( BUNfirst(b) + (r 
+ p - 1)  * quantile));
                                        bunfastins_nocheck(bn, BUNlast(bn), 0,
                                                           v, 0, Tsize(bn));
                                        nils += (*atomcmp)(v, nil) == 0;
@@ -2321,7 +2331,8 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
                }
                BATseqbase(bn, min);
        } else {
-               v = BUNtail(bi, BUNfirst(b) + (BATcount(b) - 1) / 2);
+               // actual selection of quantile value
+               v = BUNtail(bi, (oid) (BUNfirst(b) + (BATcount(b) - 1)  * 
quantile));
                BUNappend(bn, v, FALSE);
                BATseqbase(bn, 0);
                nils += (*atomcmp)(v, nil) == 0;
diff --git a/gdk/gdk_calc.h b/gdk/gdk_calc.h
--- a/gdk/gdk_calc.h
+++ b/gdk/gdk_calc.h
@@ -126,6 +126,8 @@ gdk_export BAT *BATgroupsize(BAT *b, BAT
 gdk_export BAT *BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
+gdk_export BAT *BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, 
double quantile, int skip_nils, int abort_on_error);
+
 /* helper function for grouped aggregates */
 gdk_export const char *BATgroupaggrinit(
        const BAT *b, const BAT *g, const BAT *e, const BAT *s,
diff --git a/monetdb5/mal/mal_instruction.c b/monetdb5/mal/mal_instruction.c
--- a/monetdb5/mal/mal_instruction.c
+++ b/monetdb5/mal/mal_instruction.c
@@ -98,9 +98,11 @@ newMalBlk(int maxvars, int maxstmts)
        VarPtr *v;
 
        /* each MAL instruction implies at least on variable */
-       if ( maxvars < maxstmts)
+       // TODO: this check/assignment makes little sense
+       /*
+       if (maxvars < maxstmts)
                maxvars = maxvars;
-
+       */
        v = (VarPtr *) GDKzalloc(sizeof(VarPtr) * maxvars);
        if (v == NULL) {
                GDKerror("newMalBlk:" MAL_MALLOC_FAIL);
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -38,14 +38,17 @@ static str
 AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
                        BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, 
int),
                        gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
+                       BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, 
double, int, int),
+                       double quantile,
                        int skip_nils,
                        const char *malfunc)
 {
        BAT *bn, *cnts = NULL, *t, *map;
 
-       /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
-       assert(grpfunc1 == NULL || grpfunc2 == NULL);
-       assert(grpfunc1 || grpfunc2);
+   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others 
are */
+       assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
+                       (grpfunc1 == NULL && grpfunc2 != NULL && quantilefunc 
== NULL) ||
+                       (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc 
!= NULL) );
        /* if retval2 is non-NULL, we must have grpfunc2 */
        assert(retval2 == NULL || grpfunc2 != NULL);
 
@@ -58,7 +61,7 @@ AGGRgrouped(bat *retval1, bat *retval2, 
                        BBPreleaseref(e->batCacheid);
                throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
        }
-       if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
+       if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == 
BATgroupquantile))
                tp = b->ttype;
        if (!BAThdense(b) || !BAThdense(g)) {
                /* if b or g don't have a dense head, replace the head with a
@@ -112,7 +115,9 @@ AGGRgrouped(bat *retval1, bat *retval2, 
        }
        if (grpfunc1)
                bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
-       else if ((*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp, 
skip_nils, 1) == GDK_FAIL)
+       if (quantilefunc)
+               bn = (*quantilefunc)(b, g, e, NULL, tp, quantile, skip_nils, 1);
+       if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, 
tp, skip_nils, 1) == GDK_FAIL)
                bn = NULL;
        if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) 
{
                t = BATproject(bn, b);
@@ -177,7 +182,7 @@ AGGRgrouped3(bat *retval1, bat *retval2,
        b = BATdescriptor(*bid);        /* [head,value] */
        g = BATdescriptor(*gid);        /* [head,gid] */
        e = BATdescriptor(*eid);        /* [gid,any] */
-       return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2, 
skip_nils, malfunc);
+       return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2, 
NULL, 0 , skip_nils, malfunc);
 }
 
 static str
@@ -197,7 +202,7 @@ AGGRgrouped2(bat *retval1, bat *retval2,
        BBPreleaseref(b->batCacheid);
        b = e;
        e = BATdescriptor(*eid);        /* [gid,any] */
-       return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2, 
skip_nils, malfunc);
+       return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, 
grpfunc2,NULL, 0, skip_nils, malfunc);
 }
 
 aggr_export str AGGRsum3_bte(bat *retval, bat *bid, bat *gid, bat *eid);
@@ -602,18 +607,36 @@ AGGRmedian3(bat *retval, bat *bid, bat *
                                                BATgroupmedian, NULL, 0, 
"aggr.median");
 }
 
+
+// XXX: when are these functions called?
+aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, 
double *quantile);
+str
+AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile)
+{
+       // this is inlined from AGGRgrouped3 to avoid changing all the other 
functions for now
+       BAT *b, *g, *e;
+       b = BATdescriptor(*bid);        /* [head,value] */
+       g = BATdescriptor(*gid);        /* [head,gid] */
+       e = BATdescriptor(*eid);        /* [gid,any] */
+       return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL, 
BATgroupquantile, *quantile, 0,  "aggr.quantile");
+}
+
 static str
-AGGRsubgrouped(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat 
*sid,
+AGGRsubgroupedExt(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, 
bat *sid,
                           int skip_nils, int abort_on_error, int tp,
                           BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, 
int, int),
                           gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
+                          BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, 
int, double, int, int),
+                          double quantile,
                           const char *malfunc)
 {
        BAT *b, *g, *e, *s, *bn, *cnts = NULL;
 
-       /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
-       assert(grpfunc1 == NULL || grpfunc2 == NULL);
-       assert(grpfunc1 || grpfunc2);
+   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others 
are */
+       assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) ||
+                       (grpfunc1 == NULL && grpfunc1 && quantilefunc == NULL) 
||
+                       (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc) 
);
+
        /* if retval2 is non-NULL, we must have grpfunc2 */
        assert(retval2 == NULL || grpfunc2 != NULL);
 
@@ -629,7 +652,7 @@ AGGRsubgrouped(bat *retval1, bat *retval
                        BBPreleaseref(e->batCacheid);
                throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
        }
-       if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
+       if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == 
BATgroupquantile))
                tp = b->ttype;
 
        if (sid) {
@@ -654,7 +677,9 @@ AGGRsubgrouped(bat *retval1, bat *retval
        }
        if (grpfunc1)
                bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error);
-       else if ((*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp, 
skip_nils, abort_on_error) == GDK_FAIL)
+       if (quantilefunc)
+                       bn = (*quantilefunc)(b, g, e, s, tp, quantile, 
skip_nils, abort_on_error);
+       if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, 
tp, skip_nils, abort_on_error) == GDK_FAIL)
                bn = NULL;
 
        BBPreleaseref(b->batCacheid);
@@ -692,6 +717,15 @@ AGGRsubgrouped(bat *retval1, bat *retval
        return MAL_SUCCEED;
 }
 
+static str
+AGGRsubgrouped(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat 
*sid,
+                          int skip_nils, int abort_on_error, int tp,
+                          BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, 
int, int),
+                          gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
+                          const char *malfunc) {
+       return 
AGGRsubgroupedExt(retval1,retval2,bid,gid,eid,sid,skip_nils,abort_on_error,tp,grpfunc1,grpfunc2,
 NULL,0,malfunc);
+}
+
 aggr_export str AGGRsubsum_bte(bat *retval, bat *bid, bat *gid, bat *eid, bit 
*skip_nils, bit *abort_on_error);
 str
 AGGRsubsum_bte(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils, bit 
*abort_on_error)
@@ -1168,3 +1202,28 @@ AGGRsubmediancand(bat *retval, bat *bid,
        return AGGRsubgrouped(retval, NULL, bid, gid, eid, sid, *skip_nils,
                                                  0, TYPE_any, BATgroupmedian, 
NULL, "aggr.submedian");
 }
+
+/* quantile functions, could make median functions obsolete completely */
+aggr_export str AGGRquantile(bat *retval, bat *bid, double *quantile, bit 
*skip_nils);
+str
+AGGRquantile(bat *retval, bat *bid, double *quantile, bit *skip_nils)
+{
+       return AGGRsubgroupedExt(retval, NULL, bid, NULL, NULL, NULL, 
*skip_nils,
+                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile, *quantile ,"aggr.subquantile");
+}
+
+aggr_export str AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid,  
double *quantile, bit *skip_nils);
+str
+AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile, 
bit *skip_nils)
+{
+       return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, NULL, *skip_nils,
+                                                 0, TYPE_any, NULL, NULL, 
BATgroupquantile, *quantile , "aggr.subquantile");
+}
+
+aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, 
bat *sid, double *quantile, bit *skip_nils);
+str
+AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
double *quantile,  bit *skip_nils)
+{
+       return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, sid, *skip_nils,
+                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile,*quantile, "aggr.subquantile");
+}
diff --git a/monetdb5/modules/kernel/aggr.mal b/monetdb5/modules/kernel/aggr.mal
--- a/monetdb5/modules/kernel/aggr.mal
+++ b/monetdb5/modules/kernel/aggr.mal
@@ -1433,6 +1433,7 @@ command subcount(b:bat[:oid,:any_1],g:ba
 address AGGRsubcountcand
 comment "Grouped count aggregate with candidates list";
 
+
 command median(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2]) 
:bat[:oid,:any_1]
 address AGGRmedian3
 comment "Grouped median aggregate";
@@ -1453,3 +1454,27 @@ comment "Grouped median aggregate";
 command 
submedian(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],skip_nils:bit)
 :bat[:oid,:any_1]
 address AGGRsubmediancand
 comment "Grouped median aggregate with candidate list";
+
+
+command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl) 
:bat[:oid,:any_1]
+address AGGRquantile3
+comment "Grouped quantile aggregate";
+
+function quantile(b:bat[:oid,:any_1],q:dbl) :any_1;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to