Changeset: 27035a30dfa6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=27035a30dfa6
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_aggr.c
        gdk/gdk_align.c
        gdk/gdk_analytic_func.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_calc.c
        gdk/gdk_calc.h
        gdk/gdk_calc_compare.h
        gdk/gdk_calc_private.h
        gdk/gdk_cand.c
        gdk/gdk_cand.h
        gdk/gdk_string.c
        monetdb5/modules/atoms/batxml.c
        monetdb5/modules/atoms/json.c
Branch: candidate-exceptions
Log Message:

Starting with negative candidate lists.
Negative candidates (aka exceptions) are stored in the vheap of a
TYPE_void non-dense transient BAT.
I have started bottom-up, so nowhere are these exception BATs created.
I have implemented a new way of iterating through a candidate list
using a candidate iter (struct canditer).  This is now used in some of
the functions in which candidates are used, but more are to follow.
This current version compiles and produces the same errors in the
testset as the version of the default branch on which this is based.


diffs (truncated from 14812 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -122,7 +122,7 @@ gdk_return BATfirstn(BAT **topn, BAT **g
 restrict_t BATgetaccess(BAT *b);
 PROPrec *BATgetprop(BAT *b, enum prop_t idx);
 gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, 
BAT *g, BAT *e, BAT *h) __attribute__((__warn_unused_result__));
-const char *BATgroupaggrinit(BAT *b, BAT *g, BAT *e, BAT *s, oid *minp, oid 
*maxp, BUN *ngrpp, BUN *startp, BUN *endp, const oid **candp, const oid 
**candendp);
+const char *BATgroupaggrinit(BAT *b, BAT *g, BAT *e, BAT *s, oid *minp, oid 
*maxp, BUN *ngrpp, struct canditer *ci, BUN *ncand);
 gdk_return BATgroupavg(BAT **bnp, BAT **cntsp, BAT *b, BAT *g, BAT *e, BAT *s, 
int tp, bool skip_nils, bool abort_on_error, int scale);
 BAT *BATgroupcount(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, 
bool abort_on_error);
 BAT *BATgroupmax(BAT *b, BAT *g, BAT *e, BAT *s, int tp, bool skip_nils, bool 
abort_on_error);
@@ -395,6 +395,10 @@ ssize_t bitToStr(str *dst, size_t *len, 
 ssize_t bteFromStr(const char *src, size_t *len, bte **dst, bool external);
 ssize_t bteToStr(str *dst, size_t *len, const bte *src, bool external);
 const bte bte_nil;
+oid canditer_idx(struct canditer *ci, BUN p);
+BUN canditer_init(struct canditer *ci, BAT *b, BAT *s);
+oid canditer_last(struct canditer *ci);
+void canditer_reset(struct canditer *ci);
 int closedir(DIR *dir);
 ssize_t dblFromStr(const char *src, size_t *len, dbl **dst, bool external);
 ssize_t dblToStr(str *dst, size_t *len, const dbl *src, bool external);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -550,6 +550,7 @@ typedef uint64_t BUN8type;
 #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
 #endif
 
+#include "gdk_atoms.h"
 
 /*
  * @- Checking and Error definitions:
@@ -1219,14 +1220,61 @@ typedef var_t stridx_t;
 #define BUNtvar(bi,p)  (assert((bi).b->ttype && (bi).b->tvarsized), (void *) 
(Tbase((bi).b)+BUNtvaroff(bi,p)))
 #define BUNtail(bi,p)  
((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p))
 
+#define BUNlast(b)     (assert((b)->batCount <= BUN_MAX), (b)->batCount)
+
+#define BATcount(b)    ((b)->batCount)
+
 /* return the oid value at BUN position p from the (v)oid bat b
  * works with any TYPE_void or TYPE_oid bat */
-#define BUNtoid(b,p)   (assert(ATOMtype((b)->ttype) == TYPE_oid),      \
-                        (is_oid_nil((b)->tseqbase)                     \
-                         ? ((b)->ttype == TYPE_void                    \
-                            ? (void) (p), oid_nil                      \
-                            : ((const oid *) (b)->theap.base)[p])      \
-                         : (oid) ((b)->tseqbase + (BUN) (p))))
+static inline oid
+BUNtoid(BAT *b, BUN p)
+{
+       assert(ATOMtype(b->ttype) == TYPE_oid);
+       /* BATcount is the number of valid entries, so with
+        * exceptions, the last value can well be larger than
+        * b->tseqbase + BATcount(b) */
+       assert(p < BATcount(b));
+       assert(b->ttype == TYPE_void || b->tvheap == NULL);
+       if (is_oid_nil(b->tseqbase)) {
+               if (b->ttype == TYPE_void)
+                       return b->tseqbase;
+               return ((const oid *) (b)->theap.base)[p];
+       }
+       oid o = b->tseqbase + p;
+       if (b->ttype == TYPE_oid || b->tvheap == NULL) {
+               return o;
+       }
+       /* only exceptions allowed on transient BATs */
+       assert(b->batRole == TRANSIENT);
+       /* make sure exception area is a reasonable size */
+       assert(b->tvheap->free % SIZEOF_OID == 0);
+       BUN nexc = (BUN) (b->tvheap->free / SIZEOF_OID);
+       if (nexc == 0) {
+               /* no exceptions (why the vheap?) */
+               return o;
+       }
+       const oid *exc = (oid *) b->tvheap->base;
+       if (o < exc[0]) {
+               /* before first exception */
+               return o;
+       }
+       BUN hi = nexc - 1;
+       if (o + hi >= exc[hi]) {
+               /* after last exception */
+               return o + nexc;
+       }
+       BUN lo = 0;
+       /* perform binary search on exception list
+        * loop invariant: o + lo <= exc[lo] && o + hi > exc[hi] */
+       while (lo + 1 < hi) {
+               BUN mid = (lo + hi) / 2;
+               if (o + mid <= exc[mid])
+                       lo = mid;
+               else
+                       hi = mid;
+       }
+       return o + hi;
+}
 
 static inline BATiter
 bat_iterator(BAT *b)
@@ -1238,10 +1286,6 @@ bat_iterator(BAT *b)
        return bi;
 }
 
-#define BUNlast(b)     (assert((b)->batCount <= BUN_MAX), (b)->batCount)
-
-#define BATcount(b)    ((b)->batCount)
-
 /*
  * @- BAT properties
  * @multitable @columnfractions 0.08 0.7
@@ -1427,7 +1471,7 @@ gdk_export void GDKqsort(void *restrict 
 #define BATtordered(b) ((b)->tsorted)
 #define BATtrevordered(b) ((b)->trevsorted)
 /* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */
-#define BATtdense(b)   (!is_oid_nil((b)->tseqbase))
+#define BATtdense(b)   (!is_oid_nil((b)->tseqbase) && (b)->tvheap == NULL)
 /* BATtvoid: BAT can be (or actually is) represented by TYPE_void */
 #define BATtvoid(b)    (BATtdense(b) || (b)->ttype==TYPE_void)
 #define BATtkey(b)     ((b)->tkey || BATtdense(b))
@@ -2242,7 +2286,6 @@ gdk_export void GDKclrerr(void);
 
 #include "gdk_delta.h"
 #include "gdk_hash.h"
-#include "gdk_atoms.h"
 #include "gdk_bbp.h"
 #include "gdk_utils.h"
 
@@ -2363,9 +2406,7 @@ BATdescriptor(bat i)
 static inline void *
 Tpos(BATiter *bi, BUN p)
 {
-       bi->tvid = bi->b->tseqbase;
-       if (!is_oid_nil(bi->tvid))
-               bi->tvid += p;
+       bi->tvid = BUNtoid(bi->b, p);
        return (void*)&bi->tvid;
 }
 
@@ -2742,6 +2783,7 @@ gdk_export bool BATcandcontains(BAT *s, 
 gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, 
BAT *grps, BUN n, bool asc, bool nilslast, bool distinct)
        __attribute__((__warn_unused_result__));
 
+#include "gdk_cand.h"
 #include "gdk_calc.h"
 
 /*
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -8,6 +8,7 @@
 
 #include "monetdb_config.h"
 #include "gdk.h"
+#include "gdk_cand.h"
 #include "gdk_private.h"
 #include "gdk_calc_private.h"
 #include <math.h>
@@ -61,14 +62,12 @@
 const char *
 BATgroupaggrinit(BAT *b, BAT *g, BAT *e, BAT *s,
                 /* outputs: */
-                oid *minp, oid *maxp, BUN *ngrpp, BUN *startp, BUN *endp,
-                const oid **candp, const oid **candendp)
+                oid *minp, oid *maxp, BUN *ngrpp,
+                struct canditer *ci, BUN *ncand)
 {
        oid min, max;
        BUN i, ngrp;
        const oid *restrict gids;
-       BUN start, end, cnt;
-       const oid *cand = NULL, *candend = NULL;
 
        if (b == NULL)
                return "b must exist";
@@ -136,11 +135,7 @@ BATgroupaggrinit(BAT *b, BAT *g, BAT *e,
        *maxp = max;
        *ngrpp = ngrp;
 
-       CANDINIT(b, s, start, end, cnt, cand, candend);
-       *startp = start;
-       *endp = end;
-       *candp = cand;
-       *candendp = candend;
+       *ncand = canditer_init(ci, b, s);
 
        return NULL;
 }
@@ -180,9 +175,10 @@ exchange(double *x, double *y)
 
 /* this function was adapted from https://bugs.python.org/file10357/msum4.py */
 BUN
-dofsum(const void *restrict values, oid seqb, BUN start, BUN end,
+dofsum(const void *restrict values, oid seqb,
+       struct canditer *restrict ci, BUN ncand,
        void *restrict results, BUN ngrp, int tp1, int tp2,
-       const oid *restrict cand, const oid *candend, const oid *restrict gids,
+       const oid *restrict gids,
        oid min, oid max, bool skip_nils, bool abort_on_error,
        bool nil_if_empty, const char *func)
 {
@@ -236,16 +232,9 @@ dofsum(const void *restrict values, oid 
                        return BUN_NONE;
                }
        }
-       for (;;) {
-               if (cand) {
-                       if (cand >= candend)
-                               break;
-                       listi = *cand++ - seqb;
-               } else {
-                       if (start >= end)
-                               break;
-                       listi = start++;
-               }
+       while (ncand > 0) {
+               ncand--;
+               listi = canditer_next(ci) - seqb;
                grp = gids ? gids[listi] : 0;
                if (grp < min || grp > max)
                        continue;
@@ -457,19 +446,19 @@ dofsum(const void *restrict values, oid 
        do {                                                            \
                TYPE1 x;                                                \
                const TYPE1 *restrict vals = (const TYPE1 *) values;    \
-               if (ngrp == 1 && cand == NULL) {                        \
+               if (ngrp == 1 && ci->tpe == cand_dense) {               \
                        /* single group, no candidate list */           \
                        TYPE2 sum;                                      \
                        ALGODEBUG fprintf(stderr,                       \
                                          "#%s: no candidates, no groups; " \
-                                         "start " BUNFMT ", end " BUNFMT \
+                                         "start " OIDFMT ", count " BUNFMT \
                                          ", nonil = %d\n",             \
-                                         func, start, end, nonil);     \
+                                         func, ci->seq, ncand, nonil); \
                        sum = 0;                                        \
                        if (nonil) {                                    \
-                               *seen = start < end;                    \
-                               for (i = start; i < end && nils == 0; i++) { \
-                                       x = vals[i];                    \
+                               *seen = ncand > 0;                      \
+                               for (i = 0; i < ncand && nils == 0; i++) { \
+                                       x = vals[ci->seq + i - seqb];   \
                                        ADD_WITH_CHECK(x, sum,          \
                                                       TYPE2, sum,      \
                                                       GDK_##TYPE2##_max, \
@@ -477,8 +466,8 @@ dofsum(const void *restrict values, oid 
                                }                                       \
                        } else {                                        \
                                bool seenval = false;                   \
-                               for (i = start; i < end && nils == 0; i++) { \
-                                       x = vals[i];                    \
+                               for (i = 0; i < ncand && nils == 0; i++) { \
+                                       x = vals[ci->seq + i - seqb];   \
                                        if (is_##TYPE1##_nil(x)) {      \
                                                if (!skip_nils) {       \
                                                        sum = TYPE2##_nil; \
@@ -502,15 +491,12 @@ dofsum(const void *restrict values, oid 
                        bool seenval = false;                           \
                        ALGODEBUG fprintf(stderr,                       \
                                          "#%s: with candidates, no groups; " \
-                                         "start " BUNFMT ", end " BUNFMT \
+                                         "start " OIDFMT ", count " BUNFMT \
                                          "\n",                         \
-                                         func, start, end);            \
+                                         func, ci->seq, ncand);        \
                        sum = 0;                                        \
-                       while (cand < candend && nils == 0) {           \
-                               i = *cand++ - seqb;                     \
-                               if (i >= end)                           \
-                                       break;                          \
-                               x = vals[i];                            \
+                       for (i = 0; i < ncand && nils == 0; i++) {      \
+                               x = vals[canditer_next(ci) - seqb];     \
                                if (is_##TYPE1##_nil(x)) {              \
                                        if (!skip_nils) {               \
                                                sum = TYPE2##_nil;      \
@@ -526,18 +512,18 @@ dofsum(const void *restrict values, oid 
                        }                                               \
                        if (seenval)                                    \
                                *sums = sum;                            \
-               } else if (cand == NULL) {                              \
+               } else if (ci->tpe == cand_dense) {                     \
                        /* multiple groups, no candidate list */        \
                        ALGODEBUG fprintf(stderr,                       \
                                          "#%s: no candidates, with groups; " \
-                                         "start " BUNFMT ", end " BUNFMT \
+                                         "start " OIDFMT ", count " BUNFMT \
                                          "\n",                         \
-                                         func, start, end);            \
-                       for (i = start; i < end; i++) {                 \
+                                         func, ci->seq, ncand);        \
+                       for (i = 0; i < ncand; i++) {                   \
                                if (gids == NULL ||                     \
                                    (gids[i] >= min && gids[i] <= max)) { \
                                        gid = gids ? gids[i] - min : (oid) i; \
-                                       x = vals[i];                    \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to