Changeset: 00ddce9d0743 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/00ddce9d0743
Modified Files:
gdk/gdk_join.c
Branch: default
Log Message:
Use bat iterator instead of locking multiple times.
diffs (105 lines):
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3654,12 +3654,13 @@ joincost(BAT *r, BUN lcount, struct cand
BAT *b;
BUN nheads;
BUN cnt;
+ BATiter ri = bat_iterator(r);
(void) BATcheckhash(r);
MT_rwlock_rdlock(&r->thashlock);
rhash = r->thash != NULL;
nheads = r->thash ? r->thash->nheads : 0;
- cnt = BATcount(r);
+ cnt = ri.count;
MT_rwlock_rdunlock(&r->thashlock);
if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
@@ -3670,7 +3671,7 @@ joincost(BAT *r, BUN lcount, struct cand
rcost += log2((double) rci->nvals);
}
rcost *= lcount;
- if (BATtdense(r)) {
+ if (BATtdensebi(&ri)) {
/* no need for a hash, and lookup is free */
rhash = false; /* don't use it, even if it's there */
} else {
@@ -3683,8 +3684,8 @@ joincost(BAT *r, BUN lcount, struct cand
* Multiply the probe cost by an I/O latency
* factor to encourage swapping to a sequential
* scan instead. */
- if (!GDKinmemory(r->theap->farmid) &&
- (size_t)cnt * ATOMsize(r->ttype) + (r->tvheap ?
r->tvheap->free : 0) > GDK_mem_maxsize / 4) {
+ if (!GDKinmemory(ri.h->farmid) &&
+ (size_t)cnt * ATOMsize(ri.type) + ri.vhfree >
GDK_mem_maxsize / 4) {
/* Disk random access is ~100x to 1000x
* slower than RAM. A 100x penalty
* forces the optimizer to treat these
@@ -3705,32 +3706,30 @@ joincost(BAT *r, BUN lcount, struct cand
BBPunfix(b->batCacheid);
}
if (!rhash) {
- MT_lock_set(&r->theaplock);
- double unique_est = r->tunique_est;
- MT_lock_unset(&r->theaplock);
+ double unique_est = ri.unique_est;
if (unique_est == 0) {
- unique_est = guess_uniques(r, &(struct
canditer){.tpe=cand_dense, .ncand=BATcount(r)});
- if (unique_est <= 0)
+ unique_est = guess_uniques(r, &(struct
canditer){.tpe=cand_dense, .ncand=ri.count});
+ if (unique_est <= 0) {
+ bat_iterator_end(&ri);
return -1;
+ }
}
/* we have an estimate of the number of unique
* values, assume some collisions */
rcost *= 1.1 * ((double) cnt / unique_est);
/* only count the cost of creating the hash for
* non-persistent bats */
- MT_lock_set(&r->theaplock);
/* If the BAT is persistent but so large that
* building a hash might thrash memory, consider
* it very expensive, so encourage choosing a
* linear scan of this side instead. */
- if (r->batRole != PERSISTENT /* || r->theap->dirty */
|| GDKinmemory(r->theap->farmid) ||
- (size_t)cnt * ATOMsize(r->ttype) + (r->tvheap ?
r->tvheap->free : 0) > GDK_mem_maxsize / 4)
+ if (r->batRole != PERSISTENT /* || ri.h->dirty */ ||
GDKinmemory(ri.h->farmid) ||
+ (size_t)cnt * ATOMsize(ri.type) + ri.vhfree >
GDK_mem_maxsize / 4)
rcost += cnt * 2.0;
- MT_lock_unset(&r->theaplock);
}
}
if (cand) {
- if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
+ if (rci->ncand != ri.count && rci->tpe != cand_mask) {
/* instead of using the hash on r (cost in
* rcost), we can build a new hash on r taking
* the candidate list into account; don't do
@@ -3741,13 +3740,13 @@ joincost(BAT *r, BUN lcount, struct cand
if (rhash && !prhash) {
rccost = (double) cnt / nheads;
} else {
- MT_lock_set(&r->theaplock);
- double unique_est = r->tunique_est;
- MT_lock_unset(&r->theaplock);
+ double unique_est = ri.unique_est;
if (unique_est == 0) {
unique_est = guess_uniques(r, rci);
- if (unique_est <= 0)
+ if (unique_est <= 0) {
+ bat_iterator_end(&ri);
return -1;
+ }
}
/* we have an estimate of the number of unique
* values, assume some chains */
@@ -3764,6 +3763,7 @@ joincost(BAT *r, BUN lcount, struct cand
}
*hash = rhash;
*phash = prhash;
+ bat_iterator_end(&ri);
return rcost;
}
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]