Changeset: ae6bd73bc9d7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ae6bd73bc9d7
Added Files:
        gdk/gdk_select.c
Removed Files:
        gdk/gdk_scanselect.mx
        gdk/gdk_scanselect_defs.mx
        gdk/gdk_scanselect_defs_bte.mx
        gdk/gdk_scanselect_defs_dbl.mx
        gdk/gdk_scanselect_defs_fix.mx
        gdk/gdk_scanselect_defs_flt.mx
        gdk/gdk_scanselect_defs_int.mx
        gdk/gdk_scanselect_defs_lng.mx
        gdk/gdk_scanselect_defs_sht.mx
        gdk/gdk_scanselect_defs_str.mx
        gdk/gdk_scanselect_defs_var.mx
Modified Files:
        clients/Tests/exports.stable.out
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_batop.c
        monetdb5/tests/gdkTests/Tests/intersect_diff_void.stable.out
Branch: default
Log Message:

Replaced gdk_scanselect*.mx with a new implementation.
The new implementation of the various BATselect variants all use a new
interface BATsubselect which takes a dense-headed BAT plus,
optionally, a dense-headed, oid-tail BAT with candidates, and returns
a new dense-headed BAT with oid tail that enumerates the matches.  The
result is sorted on the tail.

Only one test needed to be approved (intersect_diff_void) because the
seqbase of the empty result of a call to select is different than
before.

Note that there are no type-specific implementations, but they are
easy to add if testing shows they are beneficial.


diffs (truncated from 2727 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -172,6 +172,7 @@ BAT *BATsort(BAT *b);
 BAT *BATsort_rev(BAT *b);
 BAT *BATssort(BAT *b);
 BAT *BATssort_rev(BAT *b);
+BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int 
hi, int anti);
 BAT *BATsunion(BAT *b, BAT *c);
 BAT *BATsunique(BAT *b);
 BAT *BATthetajoin(BAT *l, BAT *r, int mode, BUN estimate);
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -19,22 +19,11 @@ MTSAFE
 
 INCLUDES = ../common/options ../common/stream ../common/utils 
$(valgrind_CFLAGS)
 
-EXTRA_DIST = gdk_scanselect_defs.mx
-
 lib_gdk = {
        VERSION = $(GDK_VERSION)
        NAME = bat
        SOURCES = \
-               gdk_scanselect_defs_bte.mx \
-               gdk_scanselect_defs_sht.mx \
-               gdk_scanselect_defs_int.mx \
-               gdk_scanselect_defs_flt.mx \
-               gdk_scanselect_defs_dbl.mx \
-               gdk_scanselect_defs_lng.mx \
-               gdk_scanselect_defs_str.mx \
-               gdk_scanselect_defs_fix.mx \
-               gdk_scanselect_defs_var.mx \
-               gdk_scanselect.mx gdk.h gdk_batop.c \
+               gdk.h gdk_batop.c gdk_select.c \
                gdk_search.c gdk_search.h gdk_tm.c \
                gdk_align.c gdk_bbp.c gdk_bbp.h \
                gdk_heap.c gdk_setop.mx gdk_utils.c gdk_utils.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3138,6 +3138,7 @@ gdk_export int BATtopN(BAT *b, BUN topN)
 #define JOIN_GE                2
 #define JOIN_BAND      3
 
+gdk_export BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, 
int li, int hi, int anti);
 gdk_export BAT *BATselect_(BAT *b, const void *tl, const void *th, bit li, bit 
hi);
 gdk_export BAT *BATuselect_(BAT *b, const void *tl, const void *th, bit li, 
bit hi);
 gdk_export BAT *BATantiuselect_(BAT *b, const void *tl, const void *th, bit 
li, bit hi);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -46,7 +46,6 @@
 #include "monetdb_config.h"
 #include "gdk.h"
 #include "gdk_private.h"
-#include "gdk_scanselect.h"
 
 #define updateloop(bn, b, func)                                                
\
        do {                                                            \
@@ -846,511 +845,6 @@ BATslice(BAT *b, BUN l, BUN h)
        return NULL;
 }
 
-static BAT *
-BATslice2(BAT *b, BUN l1, BUN h1, BUN l2, BUN h2)
-{
-       BUN p, q;
-       BAT *bn;
-       BATiter bi = bat_iterator(b);
-       int tt = b->ttype;
-
-       BATcheck(b, "BATslice");
-       if (h2 > BATcount(b))
-               h2 = BATcount(b);
-       if (h1 < l1)
-               h1 = l1;
-       if (h2 < l2)
-               h2 = l2;
-       l1 += BUNfirst(b);
-       l2 += BUNfirst(b);
-       h1 += BUNfirst(b);
-       h2 += BUNfirst(b);
-
-       if (l1 > BUN_MAX || l2 > BUN_MAX || h1 > BUN_MAX || h2 > BUN_MAX) {
-               GDKerror("BATslice2: boundary out of range\n");
-               return NULL;
-       }
-
-       if (tt == TYPE_void && b->T->seq != oid_nil)
-               tt = TYPE_oid;
-       bn = BATnew(ATOMtype(b->htype), tt, h1 - l1 + h2 - l2);
-       if (bn == NULL)
-               return bn;
-       for (p = (BUN) l1, q = (BUN) h1; p < q; p++) {
-               bunfastins(bn, BUNhead(bi, p), BUNtail(bi, p));
-       }
-       for (p = (BUN) l2, q = (BUN) h2; p < q; p++) {
-               bunfastins(bn, BUNhead(bi, p), BUNtail(bi, p));
-       }
-       bn->hsorted = BAThordered(b);
-       bn->tsorted = BATtordered(b);
-       bn->hrevsorted = BAThrevordered(b);
-       bn->trevsorted = BATtrevordered(b);
-       BATkey(bn, BAThkey(b));
-       BATkey(BATmirror(bn), BATtkey(b));
-       bn->H->nonil = b->H->nonil;
-       bn->T->nonil = b->T->nonil;
-       if (bn->hkey && bn->htype == TYPE_oid) {
-               if (BATcount(bn) == 0) {
-                       bn->hdense = TRUE;
-                       BATseqbase(bn, 0);
-               }
-       }
-       if (bn->tkey && bn->ttype == TYPE_oid) {
-               if (BATcount(bn) == 0) {
-                       bn->tdense = TRUE;
-                       BATseqbase(BATmirror(bn), 0);
-               }
-       }
-       return bn;
-      bunins_failed:
-       BBPreclaim(bn);
-       return NULL;
-}
-
-
-/*
- * @-  Value Selections
- * The string search is optimized for the degenerated case that th =
- * tl, and double elimination in the string heap.
- *
- * We allow value selections on the nil atom. This is formally not
- * correct, as in MIL (nil = nil) != true.  However, we do need an
- * implementation for selecting nil (in MIL, this is done through is
- * the "isnil" predicate). So we implement it here.
- */
-#define hashselectloop(EXT, BUNtail)                                   \
-       do {                                                            \
-               HASHloop##EXT(bi, b->H->hash, i, tl) {                  \
-                       if (q < r) {                                    \
-                               bunfastins_nocheck(bn, q, BUNtail(bi, i), \
-                                                  tl, Hsize(bn), Tsize(bn)); \
-                       }                                               \
-                       q++;                                            \
-               }                                                       \
-       } while (0)
-#define hashselect(BUNtail)                                            \
-       do {                                                            \
-               switch (ATOMstorage(b->htype)) {                        \
-               case TYPE_bte:                                          \
-                       hashselectloop(_bte, BUNtail);                  \
-                       break;                                          \
-               case TYPE_sht:                                          \
-                       hashselectloop(_sht, BUNtail);                  \
-                       break;                                          \
-               case TYPE_int:                                          \
-                       hashselectloop(_int, BUNtail);                  \
-                       break;                                          \
-               case TYPE_flt:                                          \
-                       hashselectloop(_flt, BUNtail);                  \
-                       break;                                          \
-               case TYPE_dbl:                                          \
-                       hashselectloop(_dbl, BUNtail);                  \
-                       break;                                          \
-               case TYPE_lng:                                          \
-                       hashselectloop(_lng, BUNtail);                  \
-                       break;                                          \
-               case TYPE_str:                                          \
-                       if (strElimDoubles(b->H->vheap)) {              \
-                               size_t j;                               \
-                                                                       \
-                               HASHloop_fstr(bi, b->H->hash, i, j, tl) { \
-                                       if (q < r)                      \
-                                               bunfastins_nocheck(bn, q, \
-                                                                  BUNtail(bi, 
i), \
-                                                                  tl,  \
-                                                                  Hsize(bn), \
-                                                                  Tsize(bn)); \
-                                       q++;                            \
-                               }                                       \
-                       } else {                                        \
-                               hashselectloop(_str, BUNtail);          \
-                       }                                               \
-                       break;                                          \
-               default:                                                \
-                       if (b->hvarsized) {                             \
-                               hashselectloop(var, BUNtail);           \
-                       } else {                                        \
-                               hashselectloop(loc, BUNtail);           \
-                       }                                               \
-                       break;                                          \
-               }                                                       \
-       } while (0)
-
-static BAT *
-BAT_hashselect(BAT *b, BAT *bn, const void *tl)
-{
-       int ht = bn->htype, tt = bn->ttype;
-       BUN size = BATcount(bn);
-       BUN i;
-
-       BATcheck(b, "BAT_hashselect");
-       b = BATmirror(b);
-       if (BATprepareHash(b)) {
-             bunins_failed:
-               BBPreclaim(bn);
-               return NULL;
-       }
-       while (bn) {
-               BUN q = BUNfirst(bn);
-               BUN r;
-               BATiter bi = bat_iterator(b);
-
-               assert(BATcapacity(bn) <= BUN_MAX);
-               r = (BUN) BATcapacity(bn);
-
-               if (b->tvarsized) {
-                       hashselect(BUNtvar);
-               } else {
-                       hashselect(BUNtloc);
-               }
-               if (q <= r)
-                       break;
-               size = (q - BUNfirst(bn));
-
-               BBPreclaim(bn);
-               bn = BATnew(ht, tt, size);
-       }
-       return bn;
-}
-
-/*
- * @- Range Selections
- * The routine BATselect locates the BAT subset whose tail component
- * satisfies the range condition T l <[=] tail <[=] h. Either boundary
- * is included in the result iff the respective bit parameter
- * "li"/"hi" is TRUE. A nil value in either dimension defines
- * infinity.  The value is set accordingly.
- *
- * Range selections without lower or upper bound use the nil atom to
- * indicate this (this is somewhat confusing). Note, however, that
- * through the definition of MIL we do not want the nils to appear in
- * the result (as (nil @{<,=,>@} ANY) = bit(nil) != true).
- */
-static void
-BATsetprop_wrd(BAT *b, int idx, wrd val)
-{
-       BATsetprop(b, idx, TYPE_wrd, &val);
-}
-
-static BAT *
-BAT_select_(BAT *b, const void *tl, const void *th, bit li, bit hi, bit tail, 
bit anti)
-{
-       int hval, lval, equi, t, ht, tt, lnil = 0;
-       BUN offset, batcnt, estimate = 0;
-       const void *nil;
-       BAT *bn;
-       BUN p, q;
-
-       BATcheck(b, "BATselect");
-       BATcheck(tl, "BATselect: tl value required");
-       /*
-        * Examine type, and values for lower- and higher-bound.
-        */
-       batcnt = BATcount(b);
-       /* preliminarily determine result types */
-       ht = BAThtype(b);
-       tt = tail ? BATttype(b) : TYPE_void;
-
-       t = b->ttype;
-       nil = ATOMnilptr(t);
-       lnil = ATOMcmp(t, tl, nil) == 0;
-       lval = !lnil || (th == NULL);
-       equi = ((th == NULL) || (lval && !ATOMcmp(t, tl, th)));
-       if (equi) {
-               if (th == NULL)
-                       hi = li;
-               th = tl;
-               hval = 1;       /* equi-select */
-       } else {
-               hval = ATOMcmp(t, th, nil) != 0;
-       }
-       if (anti) {
-               if (!lval != !hval) {
-                       /* one of the end points is nil and the other
-                        * isn't: swap sub-ranges */
-                       const void *tv;
-                       bit ti;
-                       ti = li;
-                       li = hi;
-                       hi = ti;
-                       tv = tl;
-                       tl = th;
-                       th = tv;
-                       anti = 0;
-                       equi = 0;
-               } else if (!lval && !hval) {
-                       /* antiselect for nil-nil range: all non-nil
-                        * values are in range, so we need to return
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to