Changeset: ae4c4e6001e3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ae4c4e6001e3
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk_group.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/batstr.c
monetdb5/optimizer/opt_mergetable.c
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql_gencode.c
sql/backends/monet5/sql_statement.c
sql/backends/monet5/sql_statement.h
sql/backends/monet5/sql_subquery.c
sql/common/sql_types.c
sql/server/rel_dump.c
sql/server/rel_optimizer.c
sql/server/rel_propagate.c
sql/server/rel_updates.c
Branch: pushcands
Log Message:
Merged with default
diffs (truncated from 7084 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -541,8 +541,8 @@ gdk_return log_bat_transient(logger *lg,
gdk_return log_constant(logger *lg, int type, ptr val, log_id id, lng offset,
lng cnt);
gdk_return log_delta(logger *lg, BAT *uid, BAT *uval, log_id id);
gdk_return log_sequence(logger *lg, int seq, lng id);
-gdk_return log_tend(logger *lg);
-gdk_return log_tstart(logger *lg, ulng commit_ts, bool flush);
+gdk_return log_tend(logger *lg, ulng commit_ts);
+gdk_return log_tstart(logger *lg, bool flush);
gdk_return logger_activate(logger *lg);
lng logger_changes(logger *lg);
logger *logger_create(int debug, const char *fn, const char *logdir, int
version, preversionfix_fptr prefuncp, postversionfix_fptr postfuncp, void
*funcdata);
@@ -761,6 +761,7 @@ void MCcloseClient(Client c);
Client MCforkClient(Client father);
Client MCgetClient(int id);
Client MCinitClient(oid user, bstream *fin, stream *fout);
+size_t MCmemoryClaim(void);
int MCpushClientInput(Client c, bstream *new_input, int listing, char *prompt);
void MCstopClients(Client c);
str MCsuspendClient(int id);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -809,12 +809,12 @@ COLcopy(BAT *b, int tt, bool writable, r
/* first try case (1); create a view, possibly with different
* atom-types */
- if (role == b->batRole &&
+ if (!writable &&
+ role == b->batRole &&
b->batRestricted == BAT_READ &&
ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
(!VIEWtparent(b) ||
- BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ) &&
- !writable) {
+ BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) {
bn = VIEWcreate(b->hseqbase, b);
if (bn == NULL)
return NULL;
@@ -875,8 +875,8 @@ COLcopy(BAT *b, int tt, bool writable, r
strconcat_len(thp.filename, sizeof(thp.filename),
BBP_physical(bn->batCacheid),
".theap", NULL);
- if ((b->ttype && HEAPcopy(&bthp, b->theap) !=
GDK_SUCCEED) ||
- (bn->tvheap && HEAPcopy(&thp, b->tvheap) !=
GDK_SUCCEED)) {
+ if ((b->ttype && HEAPcopy(&bthp, b->theap, b->tbaseoff
<< b->tshift) != GDK_SUCCEED) ||
+ (bn->tvheap && HEAPcopy(&thp, b->tvheap, 0) !=
GDK_SUCCEED)) {
HEAPfree(&thp, true);
HEAPfree(&bthp, true);
BBPreclaim(bn);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -32,7 +32,7 @@ unshare_varsized_heap(BAT *b)
h->farmid = BBPselectfarm(b->batRole, TYPE_str, varheap);
strconcat_len(h->filename, sizeof(h->filename),
BBP_physical(b->batCacheid), ".theap", NULL);
- if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
+ if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
HEAPfree(h, true);
GDKfree(h);
return GDK_FAIL;
@@ -496,7 +496,7 @@ append_varsized_bat(BAT *b, BAT *n, stru
h->farmid = BBPselectfarm(b->batRole, b->ttype, varheap);
strconcat_len(h->filename, sizeof(h->filename),
BBP_physical(b->batCacheid), ".theap", NULL);
- if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
+ if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
HEAPfree(h, true);
GDKfree(h);
return GDK_FAIL;
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -453,41 +453,43 @@ rev(oid x)
return x;
}
-/* population count: count number of 1 bits in a value */
-static inline int
-pop(oid x)
+/* count trailing zeros, also see candmask_lobit in gdk_cand.h */
+static inline int __attribute__((__const__))
+ctz(oid x)
{
-#ifdef __GNUC__
+#if defined(__GNUC__)
#if SIZEOF_OID == SIZEOF_INT
- return __builtin_popcount(x);
+ return __builtin_ctz(x);
#else
- return __builtin_popcountl(x);
+ return __builtin_ctzl(x);
#endif
-#else
-#ifdef _MSC_VER
+#elif defined(_MSC_VER)
#if SIZEOF_OID == SIZEOF_INT
- return (int) __popcnt((unsigned int) (x));
-#else
- return (int) __popcnt64((unsigned __int64) (x));
-#endif
+ unsigned long idx;
+ if (_BitScanForward(&idx, (unsigned long) x))
+ return (int) idx;
#else
- /* divide and conquer implementation */
-#if SIZEOF_OID == 8
- x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555);
- x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
- x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
- x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF);
- x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
- x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
+ unsigned long idx;
+ if (_BitScanForward64(&idx, (unsigned __int64) x))
+ return (int) idx;
+#endif
+ return -1;
#else
- x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
- x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
- x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+ /* use binary search for the lowest set bit */
+ int n = 1;
+#if SIZEOF_OID == SIZEOF_INT
+ if ((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; }
+ if ((x & 0x000000FF) == 0) { n += 8; x >>= 8; }
+ if ((x & 0x0000000F) == 0) { n += 4; x >>= 4; }
+ if ((x & 0x00000003) == 0) { n += 2; x >>= 2; }
+#else
+ if ((x & UINT64_C(0x00000000FFFFFFFF)) == 0) { n += 32; x >>= 32; }
+ if ((x & UINT64_C(0x000000000000FFFF)) == 0) { n += 16; x >>= 16; }
+ if ((x & UINT64_C(0x00000000000000FF)) == 0) { n += 8; x >>= 8; }
+ if ((x & UINT64_C(0x000000000000000F)) == 0) { n += 4; x >>= 4; }
+ if ((x & UINT64_C(0x0000000000000003)) == 0) { n += 2; x >>= 2; }
#endif
- return (int) x;
-#endif
+ return n - (x & 1);
#endif
}
@@ -1121,9 +1123,9 @@ BATgroup_internal(BAT **groups, BAT **ex
nbucket |= nbucket >> 32;
#endif
nbucket++;
- /* nbucket is a power of two, so pop(nbucket - 1)
+ /* nbucket is a power of two, so ctz(nbucket)
* tells us which power of two */
- bits = 8 * SIZEOF_OID - pop(nbucket - 1);
+ bits = 8 * SIZEOF_OID - ctz(nbucket);
} else {
nbucket = MAX(HASHmask(cnt), 1 << 16);
}
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -651,11 +651,13 @@ GDKupgradevarheap(BAT *b, var_t v, BUN c
* dst->filename (or NULL), which might be used in HEAPalloc().
*/
gdk_return
-HEAPcopy(Heap *dst, Heap *src)
+HEAPcopy(Heap *dst, Heap *src, size_t offset)
{
- if (HEAPalloc(dst, src->size, 1, 1) == GDK_SUCCEED) {
- dst->free = src->free;
- memcpy(dst->base, src->base, src->free);
+ if (offset > src->free)
+ offset = src->free;
+ if (HEAPalloc(dst, src->free - offset, 1, 1) == GDK_SUCCEED) {
+ dst->free = src->free - offset;
+ memcpy(dst->base, src->base + offset, src->free - offset);
dst->hashash = src->hashash;
dst->cleanhash = src->cleanhash;
dst->dirty = true;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2637,7 +2637,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
Hash *restrict hsh = NULL;
bool locked = false;
- assert(!BATtvoid(r));
assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
size_t counter = 0;
@@ -2648,7 +2647,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
}
int t = ATOMbasetype(r->ttype);
- if (r->ttype == TYPE_void || l->ttype == TYPE_void)
+ if (BATtvoid(r) || BATtvoid(l))
t = TYPE_void;
lwidth = l->twidth;
@@ -2718,6 +2717,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
"existing hash%s\n",
ALGOBATPAR(r),
swapped ? " (swapped)" : "");
+ } else if (BATtdense(r)) {
+ /* no hash, just dense lookup */
+ MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)"
: "hashjoin on dense");
} else {
/* we need to create a hash on r */
MT_thread_setalgorithm(swapped ? "hashjoin using new hash
(swapped)" : "hashjoin using new hash");
@@ -2728,7 +2730,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
goto bailout;
hsh = r->thash;
}
- assert(hsh != NULL);
+ assert(hsh != NULL || BATtdense(r));
ri = bat_iterator(r);
@@ -2748,7 +2750,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
false, false, __func__,
t0);
}
}
- } else {
+ } else if (!BATtdense(r)) {
for (rb = HASHget(hsh, HASHprobe(hsh, nil));
rb != HASHnil(hsh);
rb = HASHgetlink(hsh, rb)) {
@@ -2790,7 +2792,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
HASHJOIN(uuid);
break;
default:
- if (!hash_cand) {
+ if (!hash_cand && hsh) {
MT_rwlock_rdlock(&r->thashlock);
locked = true; /* in case we abandon */
hsh = r->thash; /* re-initialize inside lock */
@@ -2799,12 +2801,10 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
GDK_CHECK_TIMEOUT(timeoffset, counter,
GOTO_LABEL_TIMEOUT_HANDLER(bailout));
lo = canditer_next(lci);
- if (BATtvoid(l)) {
- if (BATtdense(l))
- lval = lo - l->hseqbase + l->tseqbase;
- } else {
+ if (BATtdense(l))
+ lval = lo - l->hseqbase + l->tseqbase;
+ else if (l->ttype != TYPE_void)
v = VALUE(l, lo - l->hseqbase);
- }
nr = 0;
if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
/* no match */
@@ -2827,6 +2827,23 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
if (semi && !max_one)
break;
}
+ } else if (hsh == NULL) {
+ assert(BATtdense(r));
+ ro = *(const oid *) v;
+ if (ro >= r->tseqbase &&
+ ro < r->tseqbase + r->batCount) {
+ ro -= r->tseqbase;
+ ro += rseq;
+ if (canditer_contains(rci, ro)) {
+ if (only_misses) {
+ nr++;
+ break;
+ }
+ HASHLOOPBODY();
+ if (semi && !max_one)
+ break;
+ }
+ }
} else if (rci->tpe != cand_dense) {
for (rb = HASHget(hsh, HASHprobe(hsh, v));
rb != HASHnil(hsh);
@@ -2901,7 +2918,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
if (nr > 0 && BATcount(r1) > nr)
r1->trevsorted = false;
}
- if (!hash_cand) {
+ if (!hash_cand && hsh) {
locked = false;
MT_rwlock_rdunlock(&r->thashlock);
}
@@ -2974,35 +2991,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
return GDK_FAIL;
}
-/* population count: count number of 1 bits in a value */
-static inline uint32_t __attribute__((__const__))
-pop(uint32_t x)
-{
-#if defined(__GNUC__)
- return (uint32_t) __builtin_popcount(x);
-#elif defined(_MSC_VER)
- return (uint32_t) __popcnt((unsigned int) (x));
-#else
- /* divide and conquer implementation (the two versions are
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list