Changeset: ae6bd73bc9d7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ae6bd73bc9d7
Added Files:
gdk/gdk_select.c
Removed Files:
gdk/gdk_scanselect.mx
gdk/gdk_scanselect_defs.mx
gdk/gdk_scanselect_defs_bte.mx
gdk/gdk_scanselect_defs_dbl.mx
gdk/gdk_scanselect_defs_fix.mx
gdk/gdk_scanselect_defs_flt.mx
gdk/gdk_scanselect_defs_int.mx
gdk/gdk_scanselect_defs_lng.mx
gdk/gdk_scanselect_defs_sht.mx
gdk/gdk_scanselect_defs_str.mx
gdk/gdk_scanselect_defs_var.mx
Modified Files:
clients/Tests/exports.stable.out
gdk/Makefile.ag
gdk/gdk.h
gdk/gdk_batop.c
monetdb5/tests/gdkTests/Tests/intersect_diff_void.stable.out
Branch: default
Log Message:
Replaced gdk_scanselect*.mx with a new implementation.
The new implementation of the various BATselect variants all use a new
interface BATsubselect which takes a dense-headed BAT plus,
optionally, a dense-headed, oid-tail BAT with candidates, and returns
a new dense-headed BAT with oid tail that enumerates the matches. The
result is sorted on the tail.
Only one test needed to be approved (intersect_diff_void) because the
seqbase of the empty result of a call to select is different than
before.
Note that there are no type-specific implementations, but they are
easy to add if testing shows they are beneficial.
diffs (truncated from 2727 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -172,6 +172,7 @@ BAT *BATsort(BAT *b);
BAT *BATsort_rev(BAT *b);
BAT *BATssort(BAT *b);
BAT *BATssort_rev(BAT *b);
+BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int
hi, int anti);
BAT *BATsunion(BAT *b, BAT *c);
BAT *BATsunique(BAT *b);
BAT *BATthetajoin(BAT *l, BAT *r, int mode, BUN estimate);
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -19,22 +19,11 @@ MTSAFE
INCLUDES = ../common/options ../common/stream ../common/utils
$(valgrind_CFLAGS)
-EXTRA_DIST = gdk_scanselect_defs.mx
-
lib_gdk = {
VERSION = $(GDK_VERSION)
NAME = bat
SOURCES = \
- gdk_scanselect_defs_bte.mx \
- gdk_scanselect_defs_sht.mx \
- gdk_scanselect_defs_int.mx \
- gdk_scanselect_defs_flt.mx \
- gdk_scanselect_defs_dbl.mx \
- gdk_scanselect_defs_lng.mx \
- gdk_scanselect_defs_str.mx \
- gdk_scanselect_defs_fix.mx \
- gdk_scanselect_defs_var.mx \
- gdk_scanselect.mx gdk.h gdk_batop.c \
+ gdk.h gdk_batop.c gdk_select.c \
gdk_search.c gdk_search.h gdk_tm.c \
gdk_align.c gdk_bbp.c gdk_bbp.h \
gdk_heap.c gdk_setop.mx gdk_utils.c gdk_utils.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3138,6 +3138,7 @@ gdk_export int BATtopN(BAT *b, BUN topN)
#define JOIN_GE 2
#define JOIN_BAND 3
+gdk_export BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th,
int li, int hi, int anti);
gdk_export BAT *BATselect_(BAT *b, const void *tl, const void *th, bit li, bit
hi);
gdk_export BAT *BATuselect_(BAT *b, const void *tl, const void *th, bit li,
bit hi);
gdk_export BAT *BATantiuselect_(BAT *b, const void *tl, const void *th, bit
li, bit hi);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -46,7 +46,6 @@
#include "monetdb_config.h"
#include "gdk.h"
#include "gdk_private.h"
-#include "gdk_scanselect.h"
#define updateloop(bn, b, func)
\
do { \
@@ -846,511 +845,6 @@ BATslice(BAT *b, BUN l, BUN h)
return NULL;
}
-static BAT *
-BATslice2(BAT *b, BUN l1, BUN h1, BUN l2, BUN h2)
-{
- BUN p, q;
- BAT *bn;
- BATiter bi = bat_iterator(b);
- int tt = b->ttype;
-
- BATcheck(b, "BATslice");
- if (h2 > BATcount(b))
- h2 = BATcount(b);
- if (h1 < l1)
- h1 = l1;
- if (h2 < l2)
- h2 = l2;
- l1 += BUNfirst(b);
- l2 += BUNfirst(b);
- h1 += BUNfirst(b);
- h2 += BUNfirst(b);
-
- if (l1 > BUN_MAX || l2 > BUN_MAX || h1 > BUN_MAX || h2 > BUN_MAX) {
- GDKerror("BATslice2: boundary out of range\n");
- return NULL;
- }
-
- if (tt == TYPE_void && b->T->seq != oid_nil)
- tt = TYPE_oid;
- bn = BATnew(ATOMtype(b->htype), tt, h1 - l1 + h2 - l2);
- if (bn == NULL)
- return bn;
- for (p = (BUN) l1, q = (BUN) h1; p < q; p++) {
- bunfastins(bn, BUNhead(bi, p), BUNtail(bi, p));
- }
- for (p = (BUN) l2, q = (BUN) h2; p < q; p++) {
- bunfastins(bn, BUNhead(bi, p), BUNtail(bi, p));
- }
- bn->hsorted = BAThordered(b);
- bn->tsorted = BATtordered(b);
- bn->hrevsorted = BAThrevordered(b);
- bn->trevsorted = BATtrevordered(b);
- BATkey(bn, BAThkey(b));
- BATkey(BATmirror(bn), BATtkey(b));
- bn->H->nonil = b->H->nonil;
- bn->T->nonil = b->T->nonil;
- if (bn->hkey && bn->htype == TYPE_oid) {
- if (BATcount(bn) == 0) {
- bn->hdense = TRUE;
- BATseqbase(bn, 0);
- }
- }
- if (bn->tkey && bn->ttype == TYPE_oid) {
- if (BATcount(bn) == 0) {
- bn->tdense = TRUE;
- BATseqbase(BATmirror(bn), 0);
- }
- }
- return bn;
- bunins_failed:
- BBPreclaim(bn);
- return NULL;
-}
-
-
-/*
- * @- Value Selections
- * The string search is optimized for the degenerated case that th =
- * tl, and double elimination in the string heap.
- *
- * We allow value selections on the nil atom. This is formally not
- * correct, as in MIL (nil = nil) != true. However, we do need an
- * implementation for selecting nil (in MIL, this is done through is
- * the "isnil" predicate). So we implement it here.
- */
-#define hashselectloop(EXT, BUNtail) \
- do { \
- HASHloop##EXT(bi, b->H->hash, i, tl) { \
- if (q < r) { \
- bunfastins_nocheck(bn, q, BUNtail(bi, i), \
- tl, Hsize(bn), Tsize(bn)); \
- } \
- q++; \
- } \
- } while (0)
-#define hashselect(BUNtail) \
- do { \
- switch (ATOMstorage(b->htype)) { \
- case TYPE_bte: \
- hashselectloop(_bte, BUNtail); \
- break; \
- case TYPE_sht: \
- hashselectloop(_sht, BUNtail); \
- break; \
- case TYPE_int: \
- hashselectloop(_int, BUNtail); \
- break; \
- case TYPE_flt: \
- hashselectloop(_flt, BUNtail); \
- break; \
- case TYPE_dbl: \
- hashselectloop(_dbl, BUNtail); \
- break; \
- case TYPE_lng: \
- hashselectloop(_lng, BUNtail); \
- break; \
- case TYPE_str: \
- if (strElimDoubles(b->H->vheap)) { \
- size_t j; \
- \
- HASHloop_fstr(bi, b->H->hash, i, j, tl) { \
- if (q < r) \
- bunfastins_nocheck(bn, q, \
- BUNtail(bi,
i), \
- tl, \
- Hsize(bn), \
- Tsize(bn)); \
- q++; \
- } \
- } else { \
- hashselectloop(_str, BUNtail); \
- } \
- break; \
- default: \
- if (b->hvarsized) { \
- hashselectloop(var, BUNtail); \
- } else { \
- hashselectloop(loc, BUNtail); \
- } \
- break; \
- } \
- } while (0)
-
-static BAT *
-BAT_hashselect(BAT *b, BAT *bn, const void *tl)
-{
- int ht = bn->htype, tt = bn->ttype;
- BUN size = BATcount(bn);
- BUN i;
-
- BATcheck(b, "BAT_hashselect");
- b = BATmirror(b);
- if (BATprepareHash(b)) {
- bunins_failed:
- BBPreclaim(bn);
- return NULL;
- }
- while (bn) {
- BUN q = BUNfirst(bn);
- BUN r;
- BATiter bi = bat_iterator(b);
-
- assert(BATcapacity(bn) <= BUN_MAX);
- r = (BUN) BATcapacity(bn);
-
- if (b->tvarsized) {
- hashselect(BUNtvar);
- } else {
- hashselect(BUNtloc);
- }
- if (q <= r)
- break;
- size = (q - BUNfirst(bn));
-
- BBPreclaim(bn);
- bn = BATnew(ht, tt, size);
- }
- return bn;
-}
-
-/*
- * @- Range Selections
- * The routine BATselect locates the BAT subset whose tail component
- * satisfies the range condition T l <[=] tail <[=] h. Either boundary
- * is included in the result iff the respective bit parameter
- * "li"/"hi" is TRUE. A nil value in either dimension defines
- * infinity. The value is set accordingly.
- *
- * Range selections without lower or upper bound use the nil atom to
- * indicate this (this is somewhat confusing). Note, however, that
- * through the definition of MIL we do not want the nils to appear in
- * the result (as (nil @{<,=,>@} ANY) = bit(nil) != true).
- */
-static void
-BATsetprop_wrd(BAT *b, int idx, wrd val)
-{
- BATsetprop(b, idx, TYPE_wrd, &val);
-}
-
-static BAT *
-BAT_select_(BAT *b, const void *tl, const void *th, bit li, bit hi, bit tail,
bit anti)
-{
- int hval, lval, equi, t, ht, tt, lnil = 0;
- BUN offset, batcnt, estimate = 0;
- const void *nil;
- BAT *bn;
- BUN p, q;
-
- BATcheck(b, "BATselect");
- BATcheck(tl, "BATselect: tl value required");
- /*
- * Examine type, and values for lower- and higher-bound.
- */
- batcnt = BATcount(b);
- /* preliminarily determine result types */
- ht = BAThtype(b);
- tt = tail ? BATttype(b) : TYPE_void;
-
- t = b->ttype;
- nil = ATOMnilptr(t);
- lnil = ATOMcmp(t, tl, nil) == 0;
- lval = !lnil || (th == NULL);
- equi = ((th == NULL) || (lval && !ATOMcmp(t, tl, th)));
- if (equi) {
- if (th == NULL)
- hi = li;
- th = tl;
- hval = 1; /* equi-select */
- } else {
- hval = ATOMcmp(t, th, nil) != 0;
- }
- if (anti) {
- if (!lval != !hval) {
- /* one of the end points is nil and the other
- * isn't: swap sub-ranges */
- const void *tv;
- bit ti;
- ti = li;
- li = hi;
- hi = ti;
- tv = tl;
- tl = th;
- th = tv;
- anti = 0;
- equi = 0;
- } else if (!lval && !hval) {
- /* antiselect for nil-nil range: all non-nil
- * values are in range, so we need to return
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list