Changeset: 38b754469d60 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=38b754469d60
Modified Files:
gdk/gdk.h
gdk/gdk_aggr.c
gdk/gdk_calc.h
monetdb5/mal/mal_instruction.c
monetdb5/modules/kernel/aggr.c
monetdb5/modules/kernel/aggr.mal
monetdb5/modules/kernel/aggr.mal.sh
sql/scripts/39_analytics.sql
sql/server/rel_dump.c
tools/merovingian/client/monetdb.c
Branch: default
Log Message:
quantiles second round, should be working on MAL level now.
diffs (truncated from 415 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2275,7 +2275,7 @@ gdk_export str GDKstrdup(const char *s);
void *_res = GDKmmap(_path, _mode, _len); \
ALLOCDEBUG \
fprintf(stderr, \
- "#GDKmmap(%s,0x%x," SZFMT ") -> " SZFMT \
+ "#GDKmmap(%s,0x%x," SZFMT ") -> " PTRFMT \
" %s[%s:%d]\n", \
_path ? _path : "NULL", _mode, _len, \
PTRFMTCAST _res, \
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2183,10 +2183,16 @@ BATmax(BAT *b, void *aggr)
/* ---------------------------------------------------------------------- */
-/* median */
+/* quantiles/median */
BAT *
-BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error)
+
+BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error) {
+ return BATgroupquantile(b,g,e,s,tp,0.5,skip_nils,abort_on_error);
+}
+
+BAT *
+ BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp, double quantile,int
skip_nils, int abort_on_error)
{
int freeb = 0, freeg = 0;
oid min, max;
@@ -2201,20 +2207,23 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
const void *nil;
int (*atomcmp)(const void *, const void *);
const char *err;
-
(void) abort_on_error;
if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
&cnt, &cand, &candend)) != NULL) {
- GDKerror("BATgroupmedian: %s\n", err);
+ GDKerror("BATgroupquantile: %s\n", err);
return NULL;
}
assert(tp == b->ttype);
if (!ATOMlinear(b->ttype)) {
- GDKerror("BATgroupmedian: cannot determine median on "
+ GDKerror("BATgroupquantile: cannot determine quantile on "
"non-linear type %s\n", ATOMname(b->ttype));
return NULL;
}
+ if (quantile < 0 || quantile > 1) {
+ GDKerror("BATgroupquantile: cannot determine quantile for p=%f
(p has to be in [0,1])\n",quantile);
+ return NULL;
+ }
if (BATcount(b) == 0 || ngrp == 0) {
/* trivial: no medians, so return bat aligned with e with
@@ -2305,7 +2314,8 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
nil, 0, Tsize(bn));
nils++;
} else {
- v = BUNtail(bi, BUNfirst(b) + (r + p -
1) / 2);
+ // actual selection of quantile value
for groups
+ v = BUNtail(bi, (oid)( BUNfirst(b) + (r
+ p - 1) * quantile));
bunfastins_nocheck(bn, BUNlast(bn), 0,
v, 0, Tsize(bn));
nils += (*atomcmp)(v, nil) == 0;
@@ -2321,7 +2331,8 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
}
BATseqbase(bn, min);
} else {
- v = BUNtail(bi, BUNfirst(b) + (BATcount(b) - 1) / 2);
+ // actual selection of quantile value
+ v = BUNtail(bi, (oid) (BUNfirst(b) + (BATcount(b) - 1) *
quantile));
BUNappend(bn, v, FALSE);
BATseqbase(bn, 0);
nils += (*atomcmp)(v, nil) == 0;
diff --git a/gdk/gdk_calc.h b/gdk/gdk_calc.h
--- a/gdk/gdk_calc.h
+++ b/gdk/gdk_calc.h
@@ -126,6 +126,8 @@ gdk_export BAT *BATgroupsize(BAT *b, BAT
gdk_export BAT *BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
gdk_export BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
gdk_export BAT *BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
+gdk_export BAT *BATgroupquantile(BAT *b, BAT *g, BAT *e, BAT *s, int tp,
double quantile, int skip_nils, int abort_on_error);
+
/* helper function for grouped aggregates */
gdk_export const char *BATgroupaggrinit(
const BAT *b, const BAT *g, const BAT *e, const BAT *s,
diff --git a/monetdb5/mal/mal_instruction.c b/monetdb5/mal/mal_instruction.c
--- a/monetdb5/mal/mal_instruction.c
+++ b/monetdb5/mal/mal_instruction.c
@@ -98,9 +98,11 @@ newMalBlk(int maxvars, int maxstmts)
VarPtr *v;
/* each MAL instruction implies at least on variable */
- if ( maxvars < maxstmts)
+ // TODO: this check/assignment makes little sense
+ /*
+ if (maxvars < maxstmts)
maxvars = maxvars;
-
+ */
v = (VarPtr *) GDKzalloc(sizeof(VarPtr) * maxvars);
if (v == NULL) {
GDKerror("newMalBlk:" MAL_MALLOC_FAIL);
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -38,14 +38,17 @@ static str
AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int,
int),
gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, int, int),
+ BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int,
double, int, int),
+ double quantile,
int skip_nils,
const char *malfunc)
{
BAT *bn, *cnts = NULL, *t, *map;
- /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
- assert(grpfunc1 == NULL || grpfunc2 == NULL);
- assert(grpfunc1 || grpfunc2);
+ /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others
are */
+ assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
+ (grpfunc1 == NULL && grpfunc2 != NULL && quantilefunc
== NULL) ||
+ (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc
!= NULL) );
/* if retval2 is non-NULL, we must have grpfunc2 */
assert(retval2 == NULL || grpfunc2 != NULL);
@@ -58,7 +61,7 @@ AGGRgrouped(bat *retval1, bat *retval2,
BBPreleaseref(e->batCacheid);
throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
}
- if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
+ if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc ==
BATgroupquantile))
tp = b->ttype;
if (!BAThdense(b) || !BAThdense(g)) {
/* if b or g don't have a dense head, replace the head with a
@@ -112,7 +115,9 @@ AGGRgrouped(bat *retval1, bat *retval2,
}
if (grpfunc1)
bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
- else if ((*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp,
skip_nils, 1) == GDK_FAIL)
+ if (quantilefunc)
+ bn = (*quantilefunc)(b, g, e, NULL, tp, quantile, skip_nils, 1);
+ if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL,
tp, skip_nils, 1) == GDK_FAIL)
bn = NULL;
if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax))
{
t = BATproject(bn, b);
@@ -177,7 +182,7 @@ AGGRgrouped3(bat *retval1, bat *retval2,
b = BATdescriptor(*bid); /* [head,value] */
g = BATdescriptor(*gid); /* [head,gid] */
e = BATdescriptor(*eid); /* [gid,any] */
- return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2,
skip_nils, malfunc);
+ return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2,
NULL, 0 , skip_nils, malfunc);
}
static str
@@ -197,7 +202,7 @@ AGGRgrouped2(bat *retval1, bat *retval2,
BBPreleaseref(b->batCacheid);
b = e;
e = BATdescriptor(*eid); /* [gid,any] */
- return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1, grpfunc2,
skip_nils, malfunc);
+ return AGGRgrouped(retval1, retval2, b, g, e, tp, grpfunc1,
grpfunc2,NULL, 0, skip_nils, malfunc);
}
aggr_export str AGGRsum3_bte(bat *retval, bat *bid, bat *gid, bat *eid);
@@ -602,18 +607,36 @@ AGGRmedian3(bat *retval, bat *bid, bat *
BATgroupmedian, NULL, 0,
"aggr.median");
}
+
+// XXX: when are these functions called?
+aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid,
double *quantile);
+str
+AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile)
+{
+ // this is inlined from AGGRgrouped3 to avoid changing all the other
functions for now
+ BAT *b, *g, *e;
+ b = BATdescriptor(*bid); /* [head,value] */
+ g = BATdescriptor(*gid); /* [head,gid] */
+ e = BATdescriptor(*eid); /* [gid,any] */
+ return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL,
BATgroupquantile, *quantile, 0, "aggr.quantile");
+}
+
static str
-AGGRsubgrouped(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat
*sid,
+AGGRsubgroupedExt(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid,
bat *sid,
int skip_nils, int abort_on_error, int tp,
BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int,
int, int),
gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, int, int),
+ BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *,
int, double, int, int),
+ double quantile,
const char *malfunc)
{
BAT *b, *g, *e, *s, *bn, *cnts = NULL;
- /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
- assert(grpfunc1 == NULL || grpfunc2 == NULL);
- assert(grpfunc1 || grpfunc2);
+ /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others
are */
+ assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) ||
+ (grpfunc1 == NULL && grpfunc1 && quantilefunc == NULL)
||
+ (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc)
);
+
/* if retval2 is non-NULL, we must have grpfunc2 */
assert(retval2 == NULL || grpfunc2 != NULL);
@@ -629,7 +652,7 @@ AGGRsubgrouped(bat *retval1, bat *retval
BBPreleaseref(e->batCacheid);
throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
}
- if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
+ if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc ==
BATgroupquantile))
tp = b->ttype;
if (sid) {
@@ -654,7 +677,9 @@ AGGRsubgrouped(bat *retval1, bat *retval
}
if (grpfunc1)
bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error);
- else if ((*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp,
skip_nils, abort_on_error) == GDK_FAIL)
+ if (quantilefunc)
+ bn = (*quantilefunc)(b, g, e, s, tp, quantile,
skip_nils, abort_on_error);
+ if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s,
tp, skip_nils, abort_on_error) == GDK_FAIL)
bn = NULL;
BBPreleaseref(b->batCacheid);
@@ -692,6 +717,15 @@ AGGRsubgrouped(bat *retval1, bat *retval
return MAL_SUCCEED;
}
+static str
+AGGRsubgrouped(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat
*sid,
+ int skip_nils, int abort_on_error, int tp,
+ BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int,
int, int),
+ gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, int, int),
+ const char *malfunc) {
+ return
AGGRsubgroupedExt(retval1,retval2,bid,gid,eid,sid,skip_nils,abort_on_error,tp,grpfunc1,grpfunc2,
NULL,0,malfunc);
+}
+
aggr_export str AGGRsubsum_bte(bat *retval, bat *bid, bat *gid, bat *eid, bit
*skip_nils, bit *abort_on_error);
str
AGGRsubsum_bte(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils, bit
*abort_on_error)
@@ -1168,3 +1202,28 @@ AGGRsubmediancand(bat *retval, bat *bid,
return AGGRsubgrouped(retval, NULL, bid, gid, eid, sid, *skip_nils,
0, TYPE_any, BATgroupmedian,
NULL, "aggr.submedian");
}
+
+/* quantile functions, could make median functions obsolete completely */
+aggr_export str AGGRquantile(bat *retval, bat *bid, double *quantile, bit
*skip_nils);
+str
+AGGRquantile(bat *retval, bat *bid, double *quantile, bit *skip_nils)
+{
+ return AGGRsubgroupedExt(retval, NULL, bid, NULL, NULL, NULL,
*skip_nils,
+ 0, TYPE_any, NULL,
NULL,BATgroupquantile, *quantile ,"aggr.subquantile");
+}
+
+aggr_export str AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid,
double *quantile, bit *skip_nils);
+str
+AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile,
bit *skip_nils)
+{
+ return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, NULL, *skip_nils,
+ 0, TYPE_any, NULL, NULL,
BATgroupquantile, *quantile , "aggr.subquantile");
+}
+
+aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid,
bat *sid, double *quantile, bit *skip_nils);
+str
+AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid,
double *quantile, bit *skip_nils)
+{
+ return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, sid, *skip_nils,
+ 0, TYPE_any, NULL,
NULL,BATgroupquantile,*quantile, "aggr.subquantile");
+}
diff --git a/monetdb5/modules/kernel/aggr.mal b/monetdb5/modules/kernel/aggr.mal
--- a/monetdb5/modules/kernel/aggr.mal
+++ b/monetdb5/modules/kernel/aggr.mal
@@ -1433,6 +1433,7 @@ command subcount(b:bat[:oid,:any_1],g:ba
address AGGRsubcountcand
comment "Grouped count aggregate with candidates list";
+
command median(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2])
:bat[:oid,:any_1]
address AGGRmedian3
comment "Grouped median aggregate";
@@ -1453,3 +1454,27 @@ comment "Grouped median aggregate";
command
submedian(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],skip_nils:bit)
:bat[:oid,:any_1]
address AGGRsubmediancand
comment "Grouped median aggregate with candidate list";
+
+
+command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl)
:bat[:oid,:any_1]
+address AGGRquantile3
+comment "Grouped quantile aggregate";
+
+function quantile(b:bat[:oid,:any_1],q:dbl) :any_1;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list