Changeset: 619584163e99 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=619584163e99
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk_aggr.c
        gdk/gdk_calc.h
        monetdb5/modules/kernel/aggr.c
        monetdb5/modules/kernel/aggr.mal
        monetdb5/modules/kernel/aggr.mal.sh
Branch: default
Log Message:

Implemented two output version of aggr.avg.
The two outputs are the average and the count over which the average
was calculated (i.e. number of non-nil values).


diffs (truncated from 1786 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -121,7 +121,7 @@ int BATgetaccess(BAT *b);
 PROPrec *BATgetprop(BAT *b, int idx);
 gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *g, 
BAT *e, BAT *h);
 const char *BATgroupaggrinit(const BAT *b, const BAT *g, const BAT *e, const 
BAT *s, oid *minp, oid *maxp, BUN *ngrpp, BUN *startp, BUN *endp, BUN *cntp, 
const oid **candp, const oid **candendp);
-BAT *BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error);
+gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, 
int tp, int skip_nils, int abort_on_error);
 BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error);
 BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error);
 BAT *BATgroupmedian(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error);
@@ -607,8 +607,10 @@ char *mapi_unquote(char *msg);
 MapiMsg mapi_virtual_result(MapiHdl hdl, int columns, const char 
**columnnames, const char **columntypes, const int *columnlengths, int 
tuplecount, const char ***tuples);
 
 # monetdb5
-str AGGRavg2_dbl(bat *retval, bat *bid, bat *eid);
-str AGGRavg3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRavg12_dbl(bat *retval, bat *bid, bat *eid);
+str AGGRavg13_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRavg22_dbl(bat *retval1, bat *retval2, bat *bid, bat *eid);
+str AGGRavg23_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid);
 str AGGRcount2(bat *retval, bat *bid, bat *eid, bit *ignorenils);
 str AGGRcount2nils(bat *retval, bat *bid, bat *eid);
 str AGGRcount2nonils(bat *retval, bat *bid, bat *eid);
@@ -640,8 +642,10 @@ str AGGRstdev2_dbl(bat *retval, bat *bid
 str AGGRstdev3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
 str AGGRstdevp2_dbl(bat *retval, bat *bid, bat *eid);
 str AGGRstdevp3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
-str AGGRsubavg_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils, 
bit *abort_on_error);
-str AGGRsubavgcand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg1_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils, 
bit *abort_on_error);
+str AGGRsubavg1cand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg2_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, 
bit *skip_nils, bit *abort_on_error);
+str AGGRsubavg2cand_dbl(bat *retval1, bat *retval2, bat *bid, bat *gid, bat 
*eid, bat *sid, bit *skip_nils, bit *abort_on_error);
 str AGGRsubcount(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils);
 str AGGRsubcountcand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, bit 
*skip_nils);
 str AGGRsubmax(bat *retval, bat *bid, bat *gid, bat *eid, bit *skip_nils);
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -1053,7 +1053,7 @@ BATprod(void *res, int tp, BAT *b, BAT *
                        z2 = (BUN) (xn - an);                           \
                        /* loop invariant: */                           \
                        /* (x - a) - z1 * n == z2 */                    \
-                       while (z2 >= (n)) {                             \
+                       while (z2 >= (BUN) (n)) {                       \
                                z2 -= (n);                              \
                                z1++;                                   \
                        }                                               \
@@ -1063,7 +1063,7 @@ BATprod(void *res, int tp, BAT *b, BAT *
                        /* (x - a) - z1 * n == -z2 */                   \
                        for (;;) {                                      \
                                z1--;                                   \
-                               if (z2 < (n)) {                         \
+                               if (z2 < (BUN) (n)) {                   \
                                        /* proper remainder */          \
                                        z2 = (n) - z2;                  \
                                        break;                          \
@@ -1073,8 +1073,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
                }                                                       \
                (a) += z1;                                              \
                (r) += z2;                                              \
-               if ((r) >= (n)) {                                       \
-                       (r) -= (n);                                     \
+               if ((r) >= (BUN) (n)) {                                 \
+                       (r) -= (BUN) (n);                               \
                        (a)++;                                          \
                }                                                       \
        } while (0)
@@ -1119,8 +1119,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
                                        gid = (oid) i;                  \
                                if (vals[i] == TYPE##_nil) {            \
                                        if (!skip_nils)                 \
-                                               cnts[gid] = BUN_NONE;   \
-                               } else if (cnts[gid] != BUN_NONE) {     \
+                                               cnts[gid] = wrd_nil;    \
+                               } else if (cnts[gid] != wrd_nil) {      \
                                        AVERAGE_ITER(TYPE, vals[i],     \
                                                     avgs[gid],         \
                                                     rems[gid],         \
@@ -1129,8 +1129,9 @@ BATprod(void *res, int tp, BAT *b, BAT *
                        }                                               \
                }                                                       \
                for (i = 0; i < ngrp; i++) {                            \
-                       if (cnts[i] == 0 || cnts[i] == BUN_NONE) {      \
+                       if (cnts[i] == 0 || cnts[i] == wrd_nil) {       \
                                dbls[i] = dbl_nil;                      \
+                               cnts[i] = 0;                            \
                                nils++;                                 \
                        } else {                                        \
                                dbls[i] = avgs[i] + (dbl) rems[i] / cnts[i]; \
@@ -1164,8 +1165,8 @@ BATprod(void *res, int tp, BAT *b, BAT *
                                        gid = (oid) i;                  \
                                if (vals[i] == TYPE##_nil) {            \
                                        if (!skip_nils)                 \
-                                               cnts[gid] = BUN_NONE;   \
-                               } else if (cnts[gid] != BUN_NONE) {     \
+                                               cnts[gid] = wrd_nil;    \
+                               } else if (cnts[gid] != wrd_nil) {      \
                                        AVERAGE_ITER_FLOAT(TYPE, vals[i], \
                                                           dbls[gid],   \
                                                           cnts[gid]);  \
@@ -1173,23 +1174,25 @@ BATprod(void *res, int tp, BAT *b, BAT *
                        }                                               \
                }                                                       \
                for (i = 0; i < ngrp; i++) {                            \
-                       if (cnts[i] == 0 || cnts[i] == BUN_NONE) {      \
+                       if (cnts[i] == 0 || cnts[i] == wrd_nil) {       \
                                dbls[i] = dbl_nil;                      \
+                               cnts[i] = 0;                            \
                                nils++;                                 \
                        }                                               \
                }                                                       \
        } while (0)
 
 /* calculate group averages with optional candidates list */
-BAT *
-BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error)
+gdk_return
+BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, int tp, 
int skip_nils, int abort_on_error)
 {
        const oid *gids;
        oid gid;
        oid min, max;
        BUN i, ngrp;
        BUN nils = 0;
-       BUN *rems = NULL, *cnts = NULL;
+       BUN *rems = NULL;
+       wrd *cnts = NULL;
        dbl *dbls;
        BAT *bn = NULL;
        BUN start, end, cnt;
@@ -1203,19 +1206,28 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT 
        if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
                                    &cnt, &cand, &candend)) != NULL) {
                GDKerror("BATgroupavg: %s\n", err);
-               return NULL;
+               return GDK_FAIL;
        }
        if (g == NULL) {
                GDKerror("BATgroupavg: b and g must be aligned\n");
-               return NULL;
+               return GDK_FAIL;
        }
 
        if (BATcount(b) == 0 || ngrp == 0) {
-               /* trivial: no products, so return bat aligned with g
+               /* trivial: no averages, so return bat aligned with g
                 * with nil in the tail */
                bn = BATconstant(TYPE_dbl, &dbl_nil, ngrp);
                BATseqbase(bn, ngrp == 0 ? 0 : min);
-               return bn;
+               if (cntsp) {
+                       wrd zero = 0;
+                       if ((*cntsp = BATconstant(TYPE_wrd, &zero, ngrp)) == 
NULL) {
+                               BBPreclaim(bn);
+                               return GDK_FAIL;
+                       }
+                       BATseqbase(*cntsp, ngrp == 0 ? 0 : min);
+               }
+               *bnp = bn;
+               return GDK_SUCCEED;
        }
 
        if ((e == NULL ||
@@ -1223,7 +1235,18 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT 
            (BATtdense(g) || (g->tkey && g->T->nonil))) {
                /* trivial: singleton groups, so all results are equal
                 * to the inputs (but possibly a different type) */
-               return BATconvert(b, s, TYPE_dbl, abort_on_error);
+               if ((bn = BATconvert(b, s, TYPE_dbl, abort_on_error)) == NULL)
+                       return GDK_FAIL;
+               if (cntsp) {
+                       wrd one = 1;
+                       if ((*cntsp = BATconstant(TYPE_wrd, &one, ngrp)) == 
NULL) {
+                               BBPreclaim(bn);
+                               return GDK_FAIL;
+                       }
+                       BATseqbase(*cntsp, ngrp == 0 ? 0 : min);
+               }
+               *bnp = bn;
+               return GDK_SUCCEED;
        }
 
        /* allocate temporary space to do per group calculations */
@@ -1239,9 +1262,16 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT 
        default:
                break;
        }
-       cnts = GDKzalloc(ngrp * sizeof(BUN));
-       if (cnts == NULL)
-               goto alloc_fail;
+       if (cntsp) {
+               if ((*cntsp = BATnew(TYPE_void, TYPE_wrd, ngrp)) == NULL)
+                       goto alloc_fail;
+               cnts = (wrd *) Tloc(*cntsp, BUNfirst(*cntsp));
+               memset(cnts, 0, ngrp * sizeof(wrd));
+       } else {
+               cnts = GDKzalloc(ngrp * sizeof(wrd));
+               if (cnts == NULL)
+                       goto alloc_fail;
+       }
 
        bn = BATnew(TYPE_void, TYPE_dbl, ngrp);
        if (bn == NULL)
@@ -1274,14 +1304,18 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT 
                break;
        default:
                GDKfree(rems);
-               GDKfree(cnts);
+               if (cntsp)
+                       BBPreclaim(*cntsp);
+               else
+                       GDKfree(cnts);
                BBPunfix(bn->batCacheid);
                GDKerror("BATgroupavg: type (%s) not supported.\n",
                         ATOMname(b->ttype));
-               return NULL;
+               return GDK_FAIL;
        }
        GDKfree(rems);
-       GDKfree(cnts);
+       if (cntsp == NULL)
+               GDKfree(cnts);
        BATsetcount(bn, ngrp);
        BATseqbase(bn, min);
        bn->tkey = BATcount(bn) <= 1;
@@ -1289,15 +1323,21 @@ BATgroupavg(BAT *b, BAT *g, BAT *e, BAT 
        bn->trevsorted = BATcount(bn) <= 1;
        bn->T->nil = nils != 0;
        bn->T->nonil = nils == 0;
-       return bn;
+       *bnp = bn;
+       return GDK_SUCCEED;
 
   alloc_fail:
        if (bn)
                BBPunfix(bn->batCacheid);
        GDKfree(rems);
-       GDKfree(cnts);
+       if (cntsp) {
+               if (*cntsp)
+                       BBPreclaim(*cntsp);
+       } else if (cnts) {
+               GDKfree(cnts);
+       }
        GDKerror("BATgroupavg: cannot allocate enough memory.\n");
-       return NULL;
+       return GDK_FAIL;
 }
 
 #define AVERAGE_TYPE(TYPE)                                             \
diff --git a/gdk/gdk_calc.h b/gdk/gdk_calc.h
--- a/gdk/gdk_calc.h
+++ b/gdk/gdk_calc.h
@@ -120,7 +120,7 @@ gdk_export int BATcalcavg(BAT *b, BAT *s
 
 gdk_export BAT *BATgroupsum(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupprod(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
-gdk_export BAT *BATgroupavg(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
+gdk_export gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT 
*e, BAT *s, int tp, int skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupsize(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -35,12 +35,19 @@
  * grouped aggregates
  */
 static str
-AGGRgrouped(bat *retval, BAT *b, BAT *g, BAT *e, int tp,
-                       BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int, 
int),
+AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
+                       BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, 
int),
+                       gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
                        int skip_nils,
                        const char *malfunc)
 {
-       BAT *bn, *t, *map;
+       BAT *bn, *cnts = NULL, *t, *map;
+
+       /* one or the other of grpfunc1 and grpfunc2 is non-NULL */
+       assert(grpfunc1 == NULL || grpfunc2 == NULL);
+       assert(grpfunc1 || grpfunc2);
+       /* if retval2 is non-NULL, we must have grpfunc2 */
+       assert(retval2 == NULL || grpfunc2 != NULL);
 
        if (b == NULL || g == NULL || e == NULL) {
                if (b)
@@ -51,7 +58,7 @@ AGGRgrouped(bat *retval, BAT *b, BAT *g,
                        BBPreleaseref(e->batCacheid);
                throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
        }
-       if (tp == TYPE_any && grpfunc == BATgroupmedian)
+       if (tp == TYPE_any && grpfunc1 == BATgroupmedian)
                tp = b->ttype;
        if (!BAThdense(b) || !BAThdense(g)) {
                /* if b or g don't have a dense head, replace the head with a
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to