Changeset: 619584163e99 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=619584163e99
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk_aggr.c
gdk/gdk_calc.h
monetdb5/modules/kernel/aggr.c
monetdb5/modules/kernel/aggr.mal
monetdb5/modules/kernel/aggr.mal.sh
Branch: default
Log Message:
Implemented two output version of aggr.avg.
The two outputs are the average and the count over which the average
was calculated (i.e. number of non-nil values).
diffs (truncated from 1786 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -121,7 +121,7 @@ int BATgetaccess(BAT *b);
PROPrec *BATgetprop(BAT *b, int idx);
gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *g,
BAT *e, BAT *h);
const char *BATgroupaggrinit(const BAT *b, const BAT *g, const BAT *e, const
BAT *s, oid *minp, oid *maxp, BUN *ngrpp, BUN *startp, BUN *endp, BUN *cntp,
const oid **candp, const oid **candendp);
-BAT *BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error);
+gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s,
int tp, int skip_nils, int abort_on_error);
BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error);
BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error);
BAT *BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error);
@@ -607,8 +607,10 @@ char *mapi_unquote(char *msg);
MapiMsg mapi_virtual_result(MapiHdl hdl, int columns, const char
**columnnames, const char **columntypes, const int *columnlengths, int
tuplecount, const char ***tuples);
# monetdb5
-str AGGRavg2_dbl(bat *retval, bat *bid, bat *eid);
-str AGGRavg3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRavg12_dbl(bat *retval, bat *bid, bat *eid);
+str AGGRavg13_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRavg22_dbl(bat *retval1, bat *retval2, bat *bid, bat *eid);
+str AGGRavg23_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid);
str AGGRcount2(bat *retval, bat *bid, bat *eid, bit *ignorenils);
str AGGRcount2nils(bat *retval, bat *bid, bat *eid);
str AGGRcount2nonils(bat *retval, bat *bid, bat *eid);
@@ -640,8 +642,10 @@ str AGGRstdev2_dbl(bat *retval, bat *bid
str AGGRstdev3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
str AGGRstdevp2_dbl(bat *retval, bat *bid, bat *eid);
str AGGRstdevp3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
-str AGGRsubavg_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils,
bit *abort_on_error);
-str AGGRsubavgcand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid,
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg1_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils,
bit *abort_on_error);
+str AGGRsubavg1cand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid,
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg2_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid,
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg2cand_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat
*eid, bat *sid, bit *skip_nils, bit *abort_on_error);
str AGGRsubcount(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils);
str AGGRsubcountcand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, bit
*skip_nils);
str AGGRsubmax(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils);
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -1053,7 +1053,7 @@ BATprod(void *res, int tp, BAT *b, BAT *
z2 = (BUN) (xn - an); \
/* loop invariant: */ \
/* (x - a) - z1 * n == z2 */ \
- while (z2 >= (n)) { \
+ while (z2 >= (BUN) (n)) { \
z2 -= (n); \
z1++; \
} \
@@ -1063,7 +1063,7 @@ BATprod(void *res, int tp, BAT *b, BAT *
/* (x - a) - z1 * n == -z2 */ \
for (;;) { \
z1--; \
- if (z2 < (n)) { \
+ if (z2 < (BUN) (n)) { \
/* proper remainder */ \
z2 = (n) - z2; \
break; \
@@ -1073,8 +1073,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
} \
(a) += z1; \
(r) += z2; \
- if ((r) >= (n)) { \
- (r) -= (n); \
+ if ((r) >= (BUN) (n)) { \
+ (r) -= (BUN) (n); \
(a)++; \
} \
} while (0)
@@ -1119,8 +1119,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
gid = (oid) i; \
if (vals[i] == TYPE##_nil) { \
if (!skip_nils) \
- cnts[gid] = BUN_NONE; \
- } else if (cnts[gid] != BUN_NONE) { \
+ cnts[gid] = wrd_nil; \
+ } else if (cnts[gid] != wrd_nil) { \
AVERAGE_ITER(TYPE, vals[i], \
avgs[gid], \
rems[gid], \
@@ -1129,8 +1129,9 @@ BATprod(void *res, int tp, BAT *b, BAT *
} \
} \
for (i = 0; i < ngrp; i++) { \
- if (cnts[i] == 0 || cnts[i] == BUN_NONE) { \
+ if (cnts[i] == 0 || cnts[i] == wrd_nil) { \
dbls[i] = dbl_nil; \
+ cnts[i] = 0; \
nils++; \
} else { \
dbls[i] = avgs[i] + (dbl) rems[i] / cnts[i]; \
@@ -1164,8 +1165,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
gid = (oid) i; \
if (vals[i] == TYPE##_nil) { \
if (!skip_nils) \
- cnts[gid] = BUN_NONE; \
- } else if (cnts[gid] != BUN_NONE) { \
+ cnts[gid] = wrd_nil; \
+ } else if (cnts[gid] != wrd_nil) { \
AVERAGE_ITER_FLOAT(TYPE, vals[i], \
dbls[gid], \
cnts[gid]); \
@@ -1173,23 +1174,25 @@ BATprod(void *res, int tp, BAT *b, BAT *
} \
} \
for (i = 0; i < ngrp; i++) { \
- if (cnts[i] == 0 || cnts[i] == BUN_NONE) { \
+ if (cnts[i] == 0 || cnts[i] == wrd_nil) { \
dbls[i] = dbl_nil; \
+ cnts[i] = 0; \
nils++; \
} \
} \
} while (0)
/* calculate group averages with optional candidates list */
-BAT *
-BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error)
+gdk_return
+BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, int tp,
int skip_nils, int abort_on_error)
{
const oid *gids;
oid gid;
oid min, max;
BUN i, ngrp;
BUN nils = 0;
- BUN *rems = NULL, *cnts = NULL;
+ BUN *rems = NULL;
+ wrd *cnts = NULL;
dbl *dbls;
BAT *bn = NULL;
BUN start, end, cnt;
@@ -1203,19 +1206,28 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT
if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
&cnt, &cand, &candend)) != NULL) {
GDKerror("BATgroupavg: %s\n", err);
- return NULL;
+ return GDK_FAIL;
}
if (g == NULL) {
GDKerror("BATgroupavg: b and g must be aligned\n");
- return NULL;
+ return GDK_FAIL;
}
if (BATcount(b) == 0 || ngrp == 0) {
- /* trivial: no products, so return bat aligned with g
+ /* trivial: no averages, so return bat aligned with g
* with nil in the tail */
bn = BATconstant(TYPE_dbl, &dbl_nil, ngrp);
BATseqbase(bn, ngrp == 0 ? 0 : min);
- return bn;
+ if (cntsp) {
+ wrd zero = 0;
+ if ((*cntsp = BATconstant(TYPE_wrd, &zero, ngrp)) ==
NULL) {
+ BBPreclaim(bn);
+ return GDK_FAIL;
+ }
+ BATseqbase(*cntsp, ngrp == 0 ? 0 : min);
+ }
+ *bnp = bn;
+ return GDK_SUCCEED;
}
if ((e == NULL ||
@@ -1223,7 +1235,18 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT
(BATtdense(g) || (g->tkey && g->T->nonil))) {
/* trivial: singleton groups, so all results are equal
* to the inputs (but possibly a different type) */
- return BATconvert(b, s, TYPE_dbl, abort_on_error);
+ if ((bn = BATconvert(b, s, TYPE_dbl, abort_on_error)) == NULL)
+ return GDK_FAIL;
+ if (cntsp) {
+ wrd one = 1;
+ if ((*cntsp = BATconstant(TYPE_wrd, &one, ngrp)) ==
NULL) {
+ BBPreclaim(bn);
+ return GDK_FAIL;
+ }
+ BATseqbase(*cntsp, ngrp == 0 ? 0 : min);
+ }
+ *bnp = bn;
+ return GDK_SUCCEED;
}
/* allocate temporary space to do per group calculations */
@@ -1239,9 +1262,16 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT
default:
break;
}
- cnts = GDKzalloc(ngrp * sizeof(BUN));
- if (cnts == NULL)
- goto alloc_fail;
+ if (cntsp) {
+ if ((*cntsp = BATnew(TYPE_void, TYPE_wrd, ngrp)) == NULL)
+ goto alloc_fail;
+ cnts = (wrd *) Tloc(*cntsp, BUNfirst(*cntsp));
+ memset(cnts, 0, ngrp * sizeof(wrd));
+ } else {
+ cnts = GDKzalloc(ngrp * sizeof(wrd));
+ if (cnts == NULL)
+ goto alloc_fail;
+ }
bn = BATnew(TYPE_void, TYPE_dbl, ngrp);
if (bn == NULL)
@@ -1274,14 +1304,18 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT
break;
default:
GDKfree(rems);
- GDKfree(cnts);
+ if (cntsp)
+ BBPreclaim(*cntsp);
+ else
+ GDKfree(cnts);
BBPunfix(bn->batCacheid);
GDKerror("BATgroupavg: type (%s) not supported.\n",
ATOMname(b->ttype));
- return NULL;
+ return GDK_FAIL;
}
GDKfree(rems);
- GDKfree(cnts);
+ if (cntsp == NULL)
+ GDKfree(cnts);
BATsetcount(bn, ngrp);
BATseqbase(bn, min);
bn->tkey = BATcount(bn) <= 1;
@@ -1289,15 +1323,21 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT
bn->trevsorted = BATcount(bn) <= 1;
bn->T->nil = nils != 0;
bn->T->nonil = nils == 0;
- return bn;
+ *bnp = bn;
+ return GDK_SUCCEED;
alloc_fail:
if (bn)
BBPunfix(bn->batCacheid);
GDKfree(rems);
- GDKfree(cnts);
+ if (cntsp) {
+ if (*cntsp)
+ BBPreclaim(*cntsp);
+ } else if (cnts) {
+ GDKfree(cnts);
+ }
GDKerror("BATgroupavg: cannot allocate enough memory.\n");
- return NULL;
+ return GDK_FAIL;
}
#define AVERAGE_TYPE(TYPE) \
diff --git a/gdk/gdk_calc.h b/gdk/gdk_calc.h
--- a/gdk/gdk_calc.h
+++ b/gdk/gdk_calc.h
@@ -120,7 +120,7 @@ gdk_export int BATcalcavg(BAT *b, BAT *s
gdk_export BAT *BATgroupsum(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
gdk_export BAT *BATgroupprod(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
-gdk_export BAT *BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
+gdk_export gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT
*e, BAT *s, int tp, int skip_nils, int abort_on_error);
gdk_export BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
gdk_export BAT *BATgroupsize(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
gdk_export BAT *BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int
skip_nils, int abort_on_error);
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -35,12 +35,19 @@
* grouped aggregates
*/
static str
-AGGRgrouped(bat *retval, BAT *b, BAT *g, BAT *e, int tp,
- BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int,
int),
+AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
+ BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int,
int),
+ gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, int, int),
int skip_nils,
const char *malfunc)
{
- BAT *bn, *t, *map;
+ BAT *bn, *cnts = NULL, *t, *map;
+
+ /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
+ assert(grpfunc1 == NULL || grpfunc2 == NULL);
+ assert(grpfunc1 || grpfunc2);
+ /* if retval2 is non-NULL, we must have grpfunc2 */
+ assert(retval2 == NULL || grpfunc2 != NULL);
if (b == NULL || g == NULL || e == NULL) {
if (b)
@@ -51,7 +58,7 @@ AGGRgrouped(bat *retval, BAT *b, BAT *g,
BBPreleaseref(e->batCacheid);
throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
}
- if (tp == TYPE_any && grpfunc == BATgroupmedian)
+ if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
tp = b->ttype;
if (!BAThdense(b) || !BAThdense(g)) {
/* if b or g don't have a dense head, replace the head with a
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list