Changeset: 00ddce9d0743 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/00ddce9d0743
Modified Files:
        gdk/gdk_join.c
Branch: default
Log Message:

Use bat iterator instead of locking multiple times.


diffs (105 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3654,12 +3654,13 @@ joincost(BAT *r, BUN lcount, struct cand
        BAT *b;
        BUN nheads;
        BUN cnt;
+       BATiter ri = bat_iterator(r);
 
        (void) BATcheckhash(r);
        MT_rwlock_rdlock(&r->thashlock);
        rhash = r->thash != NULL;
        nheads = r->thash ? r->thash->nheads : 0;
-       cnt = BATcount(r);
+       cnt = ri.count;
        MT_rwlock_rdunlock(&r->thashlock);
 
        if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
@@ -3670,7 +3671,7 @@ joincost(BAT *r, BUN lcount, struct cand
                rcost += log2((double) rci->nvals);
        }
        rcost *= lcount;
-       if (BATtdense(r)) {
+       if (BATtdensebi(&ri)) {
                /* no need for a hash, and lookup is free */
                rhash = false;  /* don't use it, even if it's there */
        } else {
@@ -3683,8 +3684,8 @@ joincost(BAT *r, BUN lcount, struct cand
                         * Multiply the probe cost by an I/O latency
                         * factor to encourage swapping to a sequential
                         * scan instead. */
-                       if (!GDKinmemory(r->theap->farmid) &&
-                           (size_t)cnt * ATOMsize(r->ttype) + (r->tvheap ? 
r->tvheap->free : 0) > GDK_mem_maxsize / 4) {
+                       if (!GDKinmemory(ri.h->farmid) &&
+                           (size_t)cnt * ATOMsize(ri.type) + ri.vhfree > 
GDK_mem_maxsize / 4) {
                                /* Disk random access is ~100x to 1000x
                                 * slower than RAM.  A 100x penalty
                                 * forces the optimizer to treat these
@@ -3705,32 +3706,30 @@ joincost(BAT *r, BUN lcount, struct cand
                        BBPunfix(b->batCacheid);
                }
                if (!rhash) {
-                       MT_lock_set(&r->theaplock);
-                       double unique_est = r->tunique_est;
-                       MT_lock_unset(&r->theaplock);
+                       double unique_est = ri.unique_est;
                        if (unique_est == 0) {
-                               unique_est = guess_uniques(r, &(struct 
canditer){.tpe=cand_dense, .ncand=BATcount(r)});
-                               if (unique_est <= 0)
+                               unique_est = guess_uniques(r, &(struct 
canditer){.tpe=cand_dense, .ncand=ri.count});
+                               if (unique_est <= 0) {
+                                       bat_iterator_end(&ri);
                                        return -1;
+                               }
                        }
                        /* we have an estimate of the number of unique
                         * values, assume some collisions */
                        rcost *= 1.1 * ((double) cnt / unique_est);
                        /* only count the cost of creating the hash for
                         * non-persistent bats */
-                       MT_lock_set(&r->theaplock);
                        /* If the BAT is persistent but so large that
                         * building a hash might thrash memory, consider
                         * it very expensive, so encourage choosing a
                         * linear scan of this side instead. */
-                       if (r->batRole != PERSISTENT /* || r->theap->dirty */ 
|| GDKinmemory(r->theap->farmid) ||
-                           (size_t)cnt * ATOMsize(r->ttype) + (r->tvheap ? 
r->tvheap->free : 0) > GDK_mem_maxsize / 4)
+                       if (r->batRole != PERSISTENT /* || ri.h->dirty */ || 
GDKinmemory(ri.h->farmid) ||
+                           (size_t)cnt * ATOMsize(ri.type) + ri.vhfree > 
GDK_mem_maxsize / 4)
                                rcost += cnt * 2.0;
-                       MT_lock_unset(&r->theaplock);
                }
        }
        if (cand) {
-               if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
+               if (rci->ncand != ri.count && rci->tpe != cand_mask) {
                        /* instead of using the hash on r (cost in
                         * rcost), we can build a new hash on r taking
                         * the candidate list into account; don't do
@@ -3741,13 +3740,13 @@ joincost(BAT *r, BUN lcount, struct cand
                        if (rhash && !prhash) {
                                rccost = (double) cnt / nheads;
                        } else {
-                               MT_lock_set(&r->theaplock);
-                               double unique_est = r->tunique_est;
-                               MT_lock_unset(&r->theaplock);
+                               double unique_est = ri.unique_est;
                                if (unique_est == 0) {
                                        unique_est = guess_uniques(r, rci);
-                                       if (unique_est <= 0)
+                                       if (unique_est <= 0) {
+                                               bat_iterator_end(&ri);
                                                return -1;
+                                       }
                                }
                                /* we have an estimate of the number of unique
                                 * values, assume some chains */
@@ -3764,6 +3763,7 @@ joincost(BAT *r, BUN lcount, struct cand
        }
        *hash = rhash;
        *phash = prhash;
+       bat_iterator_end(&ri);
        return rcost;
 }
 
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to