Changeset: 1720d839521d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1720d839521d
Modified Files:
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        sql/include/sql_catalog.h
Branch: default
Log Message:

merged with sep2022


diffs (266 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3304,8 +3304,8 @@ BATguess_uniques(BAT *b, struct canditer
 /* estimate the cost of doing a hashjoin with a hash on r; return value
  * is the estimated cost, the last three arguments receive some extra
  * information */
-static double
-joincost(BAT *r, struct canditer *lci, struct canditer *rci,
+double
+joincost(BAT *r, BUN lcount, struct canditer *rci,
         bool *hash, bool *phash, bool *cand)
 {
        bool rhash;
@@ -3331,7 +3331,7 @@ joincost(BAT *r, struct canditer *lci, s
                 * candidate types is essentially free */
                rcost += log2((double) rci->nvals);
        }
-       rcost *= lci->ncand;
+       rcost *= lcount;
        if (BATtdense(r)) {
                /* no need for a hash, and lookup is free */
                rhash = false;  /* don't use it, even if it's there */
@@ -3376,38 +3376,41 @@ joincost(BAT *r, struct canditer *lci, s
 #endif
                }
        }
-       if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
-               /* instead of using the hash on r (cost in rcost), we
-                * can build a new hash on r taking the candidate list
-                * into account; don't do this for masked candidate
-                * since the searching of the candidate list
-                * (canditer_idx) will kill us */
-               double rccost;
-               if (rhash && !prhash) {
-                       rccost = (double) cnt / nheads;
-               } else {
-                       MT_lock_set(&r->theaplock);
-                       double unique_est = r->tunique_est;
-                       MT_lock_unset(&r->theaplock);
-                       if (unique_est == 0) {
-                               unique_est = guess_uniques(r, rci);
-                               if (unique_est < 0)
-                                       return -1;
+       if (cand) {
+               if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
+                       /* instead of using the hash on r (cost in
+                        * rcost), we can build a new hash on r taking
+                        * the candidate list into account; don't do
+                        * this for masked candidate since the searching
+                        * of the candidate list (canditer_idx) will
+                        * kill us */
+                       double rccost;
+                       if (rhash && !prhash) {
+                               rccost = (double) cnt / nheads;
+                       } else {
+                               MT_lock_set(&r->theaplock);
+                               double unique_est = r->tunique_est;
+                               MT_lock_unset(&r->theaplock);
+                               if (unique_est == 0) {
+                                       unique_est = guess_uniques(r, rci);
+                                       if (unique_est < 0)
+                                               return -1;
+                               }
+                               /* we have an estimate of the number of unique
+                                * values, assume some chains */
+                               rccost = 1.1 * ((double) cnt / unique_est);
                        }
-                       /* we have an estimate of the number of unique
-                        * values, assume some chains */
-                       rccost = 1.1 * ((double) cnt / unique_est);
+                       rccost *= lcount;
+                       rccost += rci->ncand * 2.0; /* cost of building the 
hash */
+                       if (rccost < rcost) {
+                               rcost = rccost;
+                               rcand = true;
+                       }
                }
-               rccost *= lci->ncand;
-               rccost += rci->ncand * 2.0; /* cost of building the hash */
-               if (rccost < rcost) {
-                       rcost = rccost;
-                       rcand = true;
-               }
+               *cand = rcand;
        }
        *hash = rhash;
        *phash = prhash;
-       *cand = rcand;
        return rcost;
 }
 
@@ -3913,7 +3916,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                        goto doreturn;
                }
        }
-       rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
+       rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
        if (rcost < 0) {
                rc = GDK_FAIL;
                goto doreturn;
@@ -3926,7 +3929,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                bool lhash, plhash, lcand;
                double lcost;
 
-               lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
+               lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
                if (lcost < 0) {
                        rc = GDK_FAIL;
                        goto doreturn;
@@ -4260,8 +4263,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
                goto doreturn;
        }
 
-       lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
-       rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
+       lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
+       rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
        if (lcost < 0 || rcost < 0) {
                rc = GDK_FAIL;
                goto doreturn;
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -235,6 +235,8 @@ void IMPSincref(Imprints *imprints)
 void IMPSprint(BAT *b)         /* never called: for debugging only */
        __attribute__((__cold__));
 #endif
+double joincost(BAT *r, BUN lcount, struct canditer *rci, bool *hash, bool 
*phash, bool *cand)
+       __attribute__((__visibility__("hidden")));
 void STRMPincref(Strimps *strimps)
        __attribute__((__visibility__("hidden")));
 void STRMPdecref(Strimps *strimps, bool remove)
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1495,6 +1495,8 @@ BATselect(BAT *b, BAT *s, const void *tl
                return bn;
        }
 
+       /* figure out how the searched for range compares with the known
+        * minimum and maximum values */
        if (anti) {
                int c;
 
@@ -1614,82 +1616,37 @@ BATselect(BAT *b, BAT *s, const void *tl
         * persistent and the total size wouldn't be too large; check
         * for existence of hash last since that may involve I/O */
        if (equi) {
-               havehash = BATcheckhash(b);
-               if (havehash) {
-                       MT_rwlock_rdlock(&b->thashlock);
-                       if (b->thash == NULL) {
-                               MT_rwlock_rdunlock(&b->thashlock);
-                               havehash = false;
+               double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
+               if (cost > 0 && cost < ci.ncand) {
+                       wanthash = true;
+                       if (havehash) {
+                               if (phash) {
+                                       MT_rwlock_rdlock(&pb->thashlock);
+                                       if (pb->thash == NULL) {
+                                               
MT_rwlock_rdunlock(&pb->thashlock);
+                                               havehash = false;
+                                       }
+                               } else {
+                                       MT_rwlock_rdlock(&b->thashlock);
+                                       if (b->thash == NULL) {
+                                               
MT_rwlock_rdunlock(&b->thashlock);
+                                               havehash = false;
+                                       }
+                               }
                        }
-               }
-               wanthash = havehash ||
-                       ((!bi.transient ||
-                         (b->batRole == PERSISTENT && GDKinmemory(0))) &&
-                        ATOMsize(bi.type) >= sizeof(BUN) / 4 &&
-                        bi.count * (ATOMsize(bi.type) + 2 * sizeof(BUN)) < 
GDK_mem_maxsize / 2);
-               if (!wanthash) {
-                       MT_lock_set(&b->theaplock);
-                       if (++b->selcnt > 1000) {
-                               wanthash = true;
-                               b->selcnt = 1000; /* limit value */
+                       if (wanthash && !havehash) {
+                               MT_lock_set(&b->theaplock);
+                               if (++b->selcnt > 1000)
+                                       b->selcnt = 1000; /* limit value */
+                               else
+                                       wanthash = false;
+                               MT_lock_unset(&b->theaplock);
                        }
-                       MT_lock_unset(&b->theaplock);
-               }
-               if (wanthash && !havehash) {
-                       if (bi.unique_est != 0 &&
-                           bi.unique_est < bi.count / no_hash_select_fraction) 
{
-                               /* too many duplicates: not worth it */
-                               wanthash = false;
-                       }
+               } else {
+                       wanthash = havehash = false;
                }
        }
 
-       if (equi && !havehash && pb != NULL) {
-               /* use parent hash if it already exists and if either
-                * a quick check shows the value we're looking for
-                * does not occur, or if it is cheaper to check the
-                * candidate list for each value in the hash chain
-                * than to scan (cost for probe is average length of
-                * hash chain (count divided by #slots) times the cost
-                * to do a binary search on the candidate list (or 1
-                * if no need for search)) */
-               if (BATcheckhash(pb)) {
-                       MT_rwlock_rdlock(&pb->thashlock);
-                       phash = pb->thash != NULL &&
-                               (pbi.count == bi.count ||
-                                pbi.count / pb->thash->nheads * (ci.tpe != 
cand_dense ? ilog2(BATcount(s)) : 1) < (s ? BATcount(s) : bi.count) ||
-                                HASHget(pb->thash, HASHprobe(pb->thash, tl)) 
== BUN_NONE);
-                       if (phash)
-                               havehash = wanthash = true;
-                       else
-                               MT_rwlock_rdunlock(&pb->thashlock);
-               }
-               /* create a hash on the parent bat (and use it) if it is
-                * the same size as the view and it is persistent */
-               bool wantphash = false;
-               if (!phash) {
-                       MT_lock_set(&pb->theaplock);
-                       if (++pb->selcnt > 1000) {
-                               wantphash = true;
-                               pb->selcnt = 1000;
-                       }
-                       MT_lock_unset(&pb->theaplock);
-               }
-               if (!phash &&
-                   (!pbi.transient ||
-                    wantphash ||
-                    (pb->batRole == PERSISTENT && GDKinmemory(0))) &&
-                   pbi.count == bi.count &&
-                   (pbi.unique_est == 0 ||
-                    pbi.unique_est >= pbi.count / no_hash_select_fraction) &&
-                   BAThash(pb) == GDK_SUCCEED) {
-                       MT_rwlock_rdlock(&pb->thashlock);
-                       if (pb->thash)
-                               havehash = wanthash = phash = true;
-                       else
-                               MT_rwlock_rdunlock(&pb->thashlock);
-               }
-       }
        /* at this point, if havehash is set, we have the hash lock
         * the lock is on the parent if phash is set, on b itself if not
         * also, if havehash is set, then so is wanthash (but not v.v.) */
diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
--- a/sql/include/sql_catalog.h
+++ b/sql/include/sql_catalog.h
@@ -321,7 +321,7 @@ typedef struct sql_trans {
        list *dependencies;     /* list of dependencies created (list of sqlids 
from the objects) */
        list *depchanges;       /* list of dependencies changed (it would be 
tested for conflicts at the end of the transaction) */
 
-       int logchanges;         /* count number of changes to be applied to the 
wal */
+       lng logchanges;         /* count number of changes to be applied to the 
wal */
        int active;                     /* is active transaction */
        int status;                     /* status of the last query */
 
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to