Changeset: e33b754a1680 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e33b754a1680
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk_aggr.c
gdk/gdk_calc.h
monetdb5/modules/kernel/algebra.mx
Branch: default
Log Message:
Implemented BATmin/BATmax using BATgroupmin/BATgroupmax.
diffs (truncated from 670 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -151,8 +151,10 @@ BAT *BATmark(BAT *b, oid base);
BAT *BATmark_grp(BAT *b, BAT *g, oid *base);
BAT *BATmaterialize(BAT *b);
BAT *BATmaterializeh(BAT *b);
+void *BATmax(BAT *b, void *aggr);
size_t BATmemsize(BAT *b, int dirty);
BAT *BATmergecand(BAT *a, BAT *b);
+void *BATmin(BAT *b, void *aggr);
int BATmmap(BAT *b, int hb, int tb, int hh, int th, int force);
BAT *BATmode(BAT *b, int onoff);
gdk_return BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, int
order, int printorderby);
@@ -914,8 +916,6 @@ str BATXMLstr2xml(int *x, int *s);
str BATXMLxml2str(int *ret, int *bid);
str BATXMLxmltext(int *ret, int *bid);
str BATXMLxquery(int *x, int *s, str *expr);
-ptr BATmax(BAT *b, ptr aggr);
-ptr BATmin(BAT *b, ptr aggr);
str BKCaccbuild(int *ret, int *bid, str *acc, ptr *param);
str BKCaccbuild_std(int *ret, int *bid, int *acc);
str BKCappend_force_wrap(int *r, int *bid, int *uid, bit *force);
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -1732,7 +1732,7 @@ BATgroupsize(BAT *b, BAT *g, BAT *e, BAT
#define AGGR_CMP(TYPE, OP) \
do { \
const TYPE *vals = (const TYPE *) Tloc(b, BUNfirst(b)); \
- if (g && BATtdense(g)) { \
+ if (ngrp == cnt) { \
/* single element groups */ \
if (cand) { \
while (cand < candend) { \
@@ -1786,24 +1786,15 @@ BATgroupsize(BAT *b, BAT *g, BAT *e, BAT
} \
} while (0)
-/* calculate group minimums with optional candidates list
- *
- * note that this functions returns *positions* of where the minimum
- * values occur */
-BAT *
-BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error)
+static BAT *
+BATgroupminmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int
abort_on_error, BUN (*minmax)(oid *, BAT *, const oid *, BUN, oid, oid, BUN,
BUN, const oid *, const oid *, BUN, int, int), const char *name)
{
const oid *gids;
- oid gid;
oid min, max;
- BUN i, ngrp;
+ BUN ngrp;
oid *oids;
BAT *bn = NULL;
BUN nils;
- int t;
- const void *nil;
- int (*atomcmp)(const void *, const void *);
- BATiter bi;
BUN start, end, cnt;
const oid *cand = NULL, *candend = NULL;
const char *err;
@@ -1813,14 +1804,14 @@ BATgroupmin(BAT *b, BAT *g, BAT *e, BAT
(void) abort_on_error; /* functions) argument */
if (!ATOMlinear(b->ttype)) {
- GDKerror("BATgroupmin: cannot determine minimum on "
- "non-linear type %s\n", ATOMname(b->ttype));
+ GDKerror("%s: cannot determine minimum on "
+ "non-linear type %s\n", name, ATOMname(b->ttype));
return NULL;
}
if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
&cnt, &cand, &candend)) != NULL) {
- GDKerror("BATgroupmin: %s\n", err);
+ GDKerror("%s: %s\n", name, err);
return NULL;
}
@@ -1836,15 +1827,85 @@ BATgroupmin(BAT *b, BAT *g, BAT *e, BAT
if (bn == NULL)
return NULL;
oids = (oid *) Tloc(bn, BUNfirst(bn));
- nils = ngrp;
- for (i = 0; i < ngrp; i++)
- oids[i] = oid_nil;
if (g == NULL || BATtdense(g))
gids = NULL;
else
gids = (const oid *) Tloc(g, BUNfirst(g) + start);
+ nils = (*minmax)(oids, b, gids, ngrp, min, max, start, end,
+ cand, candend, cnt, skip_nils, g && BATtdense(g));
+
+ BATsetcount(bn, ngrp);
+
+ BATseqbase(bn, min);
+ bn->tkey = BATcount(bn) <= 1;
+ bn->tsorted = BATcount(bn) <= 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ bn->T->nil = nils != 0;
+ bn->T->nonil = nils == 0;
+ return bn;
+}
+
+static void *
+BATminmax(BAT *b, void *aggr,
+ BUN (*minmax)(oid *, BAT *, const oid *, BUN, oid, oid, BUN, BUN,
+ const oid *, const oid *, BUN, int, int))
+{
+ oid pos;
+ void *res;
+ int s;
+ int needdecref = 0;
+ BATiter bi;
+
+ if (!BAThdense(b)) {
+ if ((b = BATmirror(BATmark(BATmirror(b), 0))) == NULL)
+ return NULL;
+ needdecref = 1;
+ }
+ (void) (*minmax)(&pos, b, NULL, 1, 0, 0, 0, BATcount(b), NULL, NULL,
+ BATcount(b), 1, 0);
+ if (pos == oid_nil) {
+ res = ATOMnilptr(b->ttype);
+ } else {
+ bi = bat_iterator(b);
+ res = BUNtail(bi, pos + BUNfirst(b) - b->hseqbase);
+ }
+ if (aggr == NULL) {
+ s = ATOMlen(b->ttype, res);
+ aggr = GDKmalloc(s);
+ } else {
+ s = ATOMsize(ATOMtype(b->ttype));
+ }
+ if (aggr != NULL) /* else: malloc error */
+ memcpy(aggr, res, s);
+ if (needdecref)
+ BBPunfix(b->batCacheid);
+ return aggr;
+}
+
+/* calculate group minimums with optional candidates list
+ *
+ * note that this functions returns *positions* of where the minimum
+ * values occur */
+static BUN
+do_groupmin(oid *oids, BAT *b, const oid *gids, BUN ngrp, oid min, oid max,
+ BUN start, BUN end, const oid *cand, const oid *candend, BUN cnt,
+ int skip_nils, int gdense)
+{
+ oid gid;
+ BUN i, nils;
+ int t;
+ const void *nil;
+ int (*atomcmp)(const void *, const void *);
+ BATiter bi;
+
+ nils = ngrp;
+ for (i = 0; i < ngrp; i++)
+ oids[i] = oid_nil;
+ if (cnt == 0)
+ return nils;
+
t = b->T->type;
nil = ATOMnilptr(t);
atomcmp = BATatoms[t].atomCmp;
@@ -1874,10 +1935,155 @@ BATgroupmin(BAT *b, BAT *g, BAT *e, BAT
case TYPE_dbl:
AGGR_CMP(dbl, LT);
break;
+ case TYPE_void:
+ if (!gdense && gids == NULL) {
+ oids[0] = start + b->hseqbase;
+ nils--;
+ break;
+ }
+ /* fall through */
default:
bi = bat_iterator(b);
- if (g && BATtdense(g)) {
+ if (gdense) {
+ /* single element groups */
+ if (cand) {
+ while (cand < candend) {
+ i = *cand++ - b->hseqbase;
+ if (i >= end)
+ break;
+ if (!skip_nils ||
+ (*atomcmp)(BUNtail(bi, i +
BUNfirst(b)), nil) != 0) {
+ oids[i] = i + b->hseqbase;
+ nils--;
+ }
+ }
+ } else {
+ for (i = start; i < end; i++) {
+ if (!skip_nils ||
+ (*atomcmp)(BUNtail(bi, i +
BUNfirst(b)), nil) != 0) {
+ oids[i] = i + b->hseqbase;
+ nils--;
+ }
+ }
+ }
+ } else {
+ gid = 0; /* in case gids == NULL */
+ for (;;) {
+ if (cand) {
+ if (cand == candend)
+ break;
+ i = *cand++ - b->hseqbase;
+ if (i >= end)
+ break;
+ } else {
+ i = start++;
+ if (i == end)
+ break;
+ }
+ if (gids == NULL ||
+ (gids[i] >= min && gids[i] <= max)) {
+ const void *v = BUNtail(bi, i +
BUNfirst(b));
+ if (gids)
+ gid = gids[i] - min;
+ if (!skip_nils ||
+ (*atomcmp)(v, nil) != 0) {
+ if (oids[gid] == oid_nil) {
+ oids[gid] = i +
b->hseqbase;
+ nils--;
+ } else if (t != TYPE_void) {
+ const void *g =
BUNtail(bi, (BUN) (oids[gid] - b->hseqbase) + BUNfirst(b));
+ if ((*atomcmp)(g, nil)
!= 0 &&
+ ((*atomcmp)(v, nil)
== 0 ||
+ LT((*atomcmp)(v,
g), 0)))
+ oids[gid] = i +
b->hseqbase;
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ return nils;
+}
+
+BAT *
+BATgroupmin(BAT *b, BAT *g, BAT *e, BAT *s, int tp,
+ int skip_nils, int abort_on_error)
+{
+ return BATgroupminmax(b, g, e, s, tp, skip_nils, abort_on_error,
+ do_groupmin, "BATgroupmin");
+}
+
+void *
+BATmin(BAT *b, void *aggr)
+{
+ return BATminmax(b, aggr, do_groupmin);
+}
+
+/* calculate group maximums with optional candidates list
+ *
+ * note that this functions returns *positions* of where the maximum
+ * values occur */
+static BUN
+do_groupmax(oid *oids, BAT *b, const oid *gids, BUN ngrp, oid min, oid max,
+ BUN start, BUN end, const oid *cand, const oid *candend, BUN cnt,
+ int skip_nils, int gdense)
+{
+ oid gid;
+ BUN i, nils;
+ int t;
+ const void *nil;
+ int (*atomcmp)(const void *, const void *);
+ BATiter bi;
+
+ nils = ngrp;
+ for (i = 0; i < ngrp; i++)
+ oids[i] = oid_nil;
+ if (cnt == 0)
+ return nils;
+
+ t = b->T->type;
+ nil = ATOMnilptr(t);
+ atomcmp = BATatoms[t].atomCmp;
+ if (t != ATOMstorage(t) &&
+ ATOMnilptr(ATOMstorage(t)) == nil &&
+ BATatoms[ATOMstorage(t)].atomCmp == atomcmp)
+ t = ATOMstorage(t);
+ switch (t) {
+ case TYPE_bte:
+ AGGR_CMP(bte, GT);
+ break;
+ case TYPE_sht:
+ AGGR_CMP(sht, GT);
+ break;
+ case TYPE_int:
+ AGGR_CMP(int, GT);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list