Changeset: 0b3db1241854 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/0b3db1241854
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_join.c
gdk/gdk_select.c
gdk/gdk_unique.c
monetdb5/mal/mal_profiler.c
monetdb5/mal/mal_resource.c
monetdb5/mal/mal_runtime.c
sql/backends/monet5/sql.c
Branch: default
Log Message:
Merge with Jul2021 branch.
diffs (truncated from 843 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -156,7 +156,6 @@ BAT *BATintersectcand(BAT *a, BAT *b);
BAT *BATintersectcand(BAT *a, BAT *b);
gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
gdk_return BATkey(BAT *b, bool onoff);
-bool BATkeyed(BAT *b);
gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected);
void *BATmax(BAT *b, void *aggr);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -1380,7 +1380,6 @@ gdk_export gdk_return BATprint(stream *s
* ordered. The result is returned and stored in the tsorted field of
* the BAT.
*/
-gdk_export bool BATkeyed(BAT *b);
gdk_export bool BATordered(BAT *b);
gdk_export bool BATordered_rev(BAT *b);
gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b,
BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -385,9 +385,11 @@ insert_string_bat(BAT *b, BAT *n, struct
assert(b->batCapacity >= b->batCount);
b->theap->dirty = true;
/* maintain hash */
+ MT_rwlock_wrlock(&b->thashlock);
for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
- HASHappend(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r,
b->twidth));
+ HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b,
0), r, b->twidth));
}
+ MT_rwlock_wrunlock(&b->thashlock);
return GDK_SUCCEED;
}
@@ -494,6 +496,7 @@ append_varsized_bat(BAT *b, BAT *n, stru
}
/* copy data from n to b */
r = BUNlast(b);
+ MT_rwlock_wrlock(&b->thashlock);
while (cnt > 0) {
cnt--;
BUN p = canditer_next(ci) - hseq;
@@ -503,9 +506,10 @@ append_varsized_bat(BAT *b, BAT *n, stru
return GDK_FAIL;
}
if (b->thash)
- HASHappend(b, r, t);
+ HASHappend_locked(b, r, t);
r++;
}
+ MT_rwlock_wrunlock(&b->thashlock);
BATsetcount(b, r);
bat_iterator_end(&ni);
b->theap->dirty = true;
@@ -1843,157 +1847,6 @@ BATslice(BAT *b, BUN l, BUN h)
return bn;
}
-/* Return whether the BAT has all unique values or not. It we don't
- * know, invest in a proper check and record the results in the bat
- * descriptor. */
-bool
-BATkeyed(BAT *b)
-{
- lng t0 = GDKusec();
- int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
- BUN p, q, hb;
- Hash *hs = NULL;
-
- if (b->ttype == TYPE_void)
- return BATtdense(b) || BATcount(b) <= 1;
- if (BATcount(b) <= 1)
- return true;
- if (ATOMstorage(b->ttype) == TYPE_msk) {
- if (BATcount(b) > 2)
- return false;
- /* there are exactly two values */
- return mskGetVal(b, 0) != mskGetVal(b, 1);
- }
- if (b->twidth < SIZEOF_BUN &&
- BATcount(b) > (BUN) 1 << (8 << b->tshift)) {
- /* more rows than possible bit combinations in the atom */
- assert(!b->tkey);
- return false;
- }
-
- b->batDirtydesc = true;
- BATiter bi = bat_iterator(b);
- if (!b->tkey && b->tnokey[0] == 0 && b->tnokey[1] == 0) {
- if (b->tsorted || b->trevsorted) {
- const void *prev = BUNtail(bi, 0);
- const void *cur;
- for (q = BUNlast(b), p = 1; p < q; p++) {
- cur = BUNtail(bi, p);
- if ((*cmpf)(prev, cur) == 0) {
- b->tnokey[0] = p - 1;
- b->tnokey[1] = p;
- TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT
"," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p - 1, p, ALGOBATPAR(b),
GDKusec() - t0);
- goto doreturn;
- }
- prev = cur;
- }
- /* we completed the scan: no duplicates */
- b->tkey = true;
- } else if (BATcheckhash(b) ||
- (!b->batTransient &&
- BAThash(b) == GDK_SUCCEED) ||
- (/* DISABLES CODE */ (0) &&
- VIEWtparent(b) != 0 &&
- BATcheckhash(BBP_cache(VIEWtparent(b))))) {
- /* we already have a hash table on b, or b is
- * persistent and we could create a hash
- * table, or b is a view on a bat that already
- * has a hash table */
- BUN lo = 0;
-
- MT_rwlock_rdlock(&b->thashlock);
- hs = b->thash;
- if (hs == NULL && VIEWtparent(b) != 0) {
- BAT *b2 = BBP_cache(VIEWtparent(b));
- lo = b->tbaseoff - b2->tbaseoff;
- hs = b2->thash;
- }
- if (hs == NULL) {
- /* between checking and locking, the
- * hash was destroyed */
- MT_rwlock_rdunlock(&b->thashlock);
- goto lost_hash;
- }
- for (q = BUNlast(b), p = 0; p < q; p++) {
- const void *v = BUNtail(bi, p);
- for (hb = HASHgetlink(hs, p + lo);
- hb != BUN_NONE && hb >= lo;
- hb = HASHgetlink(hs, hb)) {
- assert(hb < p + lo);
- if ((*cmpf)(v, BUNtail(bi, hb - lo)) ==
0) {
- b->tnokey[0] = hb - lo;
- b->tnokey[1] = p;
- TRC_DEBUG(ALGO, "Fixed nokey("
BUNFMT "," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", hb - lo, p,
ALGOBATPAR(b), GDKusec() - t0);
-
MT_rwlock_rdunlock(&b->thashlock);
- goto doreturn;
- }
- }
- }
- MT_rwlock_rdunlock(&b->thashlock);
- /* we completed the scan: no duplicates */
- b->tkey = true;
- } else {
- const char *nme;
- BUN prb;
- BUN mask;
-
- lost_hash:
- GDKclrerr(); /* not interested in BAThash errors */
- nme = BBP_physical(b->batCacheid);
- if (ATOMbasetype(b->ttype) == TYPE_bte) {
- mask = (BUN) 1 << 8;
- cmpf = NULL; /* no compare needed, "hash" is
perfect */
- } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
- mask = (BUN) 1 << 16;
- cmpf = NULL; /* no compare needed, "hash" is
perfect */
- } else {
- mask = HASHmask(b->batCount);
- if (mask < ((BUN) 1 << 16))
- mask = (BUN) 1 << 16;
- }
- if ((hs = GDKzalloc(sizeof(Hash))) == NULL)
- goto doreturn;
- if (snprintf(hs->heaplink.filename,
sizeof(hs->heaplink.filename), "%s.thshkeyl%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs->heaplink.filename) ||
- snprintf(hs->heapbckt.filename,
sizeof(hs->heapbckt.filename), "%s.thshkeyb%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs->heapbckt.filename) ||
- HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE,
false) != GDK_SUCCEED) {
- GDKfree(hs);
- /* err on the side of caution: not keyed */
- goto doreturn;
- }
- for (q = BUNlast(b), p = 0; p < q; p++) {
- const void *v = BUNtail(bi, p);
- prb = HASHprobe(hs, v);
- for (hb = HASHget(hs, prb);
- hb != BUN_NONE;
- hb = HASHgetlink(hs, hb)) {
- if (cmpf == NULL ||
- (*cmpf)(v, BUNtail(bi, hb)) == 0) {
- b->tnokey[0] = hb;
- b->tnokey[1] = p;
- TRC_DEBUG(ALGO, "Fixed nokey("
BUNFMT "," BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", hb, p,
ALGOBATPAR(b), GDKusec() - t0);
- goto doreturn_free;
- }
- }
- /* enter into hash table */
- HASHputlink(hs, p, HASHget(hs, prb));
- HASHput(hs, prb, p);
- }
- doreturn_free:
- HEAPfree(&hs->heaplink, true);
- HEAPfree(&hs->heapbckt, true);
- GDKfree(hs);
- if (p == q) {
- /* we completed the complete scan: no
- * duplicates */
- b->tkey = true;
- }
- }
- }
- doreturn:
- bat_iterator_end(&bi);
- return b->tkey;
-}
-
#define BAT_ORDERED(TPE) \
do { \
const TPE *restrict vals = Tloc(b, 0); \
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -1972,19 +1972,22 @@ BBPdump(void)
}
}
}
- if (b->thash && b->thash != (Hash *) 1) {
- size_t m = HEAPmemsize(&b->thash->heaplink) +
HEAPmemsize(&b->thash->heapbckt);
- size_t v = HEAPvmsize(&b->thash->heaplink) +
HEAPvmsize(&b->thash->heapbckt);
- fprintf(stderr, " Thash=[%zu,%zu,f=%d/%d]", m, v,
- b->thash->heaplink.farmid,
- b->thash->heapbckt.farmid);
- if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
- cmem += m;
- cvm += v;
- } else {
- mem += m;
- vm += v;
+ if (MT_rwlock_rdtry(&b->thashlock)) {
+ if (b->thash && b->thash != (Hash *) 1) {
+ size_t m = HEAPmemsize(&b->thash->heaplink) +
HEAPmemsize(&b->thash->heapbckt);
+ size_t v = HEAPvmsize(&b->thash->heaplink) +
HEAPvmsize(&b->thash->heapbckt);
+ fprintf(stderr, " Thash=[%zu,%zu,f=%d/%d]", m,
v,
+ b->thash->heaplink.farmid,
+ b->thash->heapbckt.farmid);
+ if (BBP_logical(i) && BBP_logical(i)[0] == '.')
{
+ cmem += m;
+ cvm += v;
+ } else {
+ mem += m;
+ vm += v;
+ }
}
+ MT_rwlock_rdunlock(&b->thashlock);
}
fprintf(stderr, " role: %s\n",
b->batRole == PERSISTENT ? "persistent" : "transient");
@@ -2890,9 +2893,11 @@ BBPsave(BAT *b)
if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirtydata(b)) {
/* do nothing */
+ MT_rwlock_rdlock(&b->thashlock);
if (b->thash && b->thash != (Hash *) 1 &&
(b->thash->heaplink.dirty || b->thash->heapbckt.dirty))
BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0);
+ MT_rwlock_rdunlock(&b->thashlock);
return GDK_SUCCEED;
}
if (lock)
diff --git a/gdk/gdk_delta.c b/gdk/gdk_delta.c
--- a/gdk/gdk_delta.c
+++ b/gdk/gdk_delta.c
@@ -93,8 +93,7 @@ BATundo(BAT *b)
gdk_return (*tunfix) (const void *) =
BATatoms[b->ttype].atomUnfix;
void (*tatmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
- if (b->thash)
- HASHdestroy(b);
+ HASHdestroy(b);
if (tunfix || tatmdel) {
for (p = bunfirst; p <= bunlast; p++, i++) {
if (tunfix)
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2520,11 +2520,6 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
TYPE *rvals = ri.base; \
TYPE *lvals = li.base; \
TYPE v; \
- if (!hash_cand) { \
- MT_rwlock_rdlock(&r->thashlock); \
- locked = true; /* in case we abandon */ \
- hsh = r->thash; /* re-initialize inside lock */ \
- } \
while (lci->next < lci->ncand) { \
GDK_CHECK_TIMEOUT(timeoffset, counter,
GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
lo = canditer_next(lci); \
@@ -2626,10 +2621,6 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
if (nr > 0 && BATcount(r1) > nr) \
r1->trevsorted = false; \
} \
- if (!hash_cand) { \
- locked = false; \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list