Changeset: 1720d839521d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1720d839521d
Modified Files:
gdk/gdk_join.c
gdk/gdk_private.h
gdk/gdk_select.c
sql/include/sql_catalog.h
Branch: default
Log Message:
merged with sep2022
diffs (266 lines):
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3304,8 +3304,8 @@ BATguess_uniques(BAT *b, struct canditer
/* estimate the cost of doing a hashjoin with a hash on r; return value
* is the estimated cost, the last three arguments receive some extra
* information */
-static double
-joincost(BAT *r, struct canditer *lci, struct canditer *rci,
+double
+joincost(BAT *r, BUN lcount, struct canditer *rci,
bool *hash, bool *phash, bool *cand)
{
bool rhash;
@@ -3331,7 +3331,7 @@ joincost(BAT *r, struct canditer *lci, s
* candidate types is essentially free */
rcost += log2((double) rci->nvals);
}
- rcost *= lci->ncand;
+ rcost *= lcount;
if (BATtdense(r)) {
/* no need for a hash, and lookup is free */
rhash = false; /* don't use it, even if it's there */
@@ -3376,38 +3376,41 @@ joincost(BAT *r, struct canditer *lci, s
#endif
}
}
- if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
- /* instead of using the hash on r (cost in rcost), we
- * can build a new hash on r taking the candidate list
- * into account; don't do this for masked candidate
- * since the searching of the candidate list
- * (canditer_idx) will kill us */
- double rccost;
- if (rhash && !prhash) {
- rccost = (double) cnt / nheads;
- } else {
- MT_lock_set(&r->theaplock);
- double unique_est = r->tunique_est;
- MT_lock_unset(&r->theaplock);
- if (unique_est == 0) {
- unique_est = guess_uniques(r, rci);
- if (unique_est < 0)
- return -1;
+ if (cand) {
+ if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
+ /* instead of using the hash on r (cost in
+ * rcost), we can build a new hash on r taking
+ * the candidate list into account; don't do
+ * this for masked candidate since the searching
+ * of the candidate list (canditer_idx) will
+ * kill us */
+ double rccost;
+ if (rhash && !prhash) {
+ rccost = (double) cnt / nheads;
+ } else {
+ MT_lock_set(&r->theaplock);
+ double unique_est = r->tunique_est;
+ MT_lock_unset(&r->theaplock);
+ if (unique_est == 0) {
+ unique_est = guess_uniques(r, rci);
+ if (unique_est < 0)
+ return -1;
+ }
+ /* we have an estimate of the number of unique
+ * values, assume some chains */
+ rccost = 1.1 * ((double) cnt / unique_est);
}
- /* we have an estimate of the number of unique
- * values, assume some chains */
- rccost = 1.1 * ((double) cnt / unique_est);
+ rccost *= lcount;
+ rccost += rci->ncand * 2.0; /* cost of building the
hash */
+ if (rccost < rcost) {
+ rcost = rccost;
+ rcand = true;
+ }
}
- rccost *= lci->ncand;
- rccost += rci->ncand * 2.0; /* cost of building the hash */
- if (rccost < rcost) {
- rcost = rccost;
- rcand = true;
- }
+ *cand = rcand;
}
*hash = rhash;
*phash = prhash;
- *cand = rcand;
return rcost;
}
@@ -3913,7 +3916,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
goto doreturn;
}
}
- rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
+ rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
if (rcost < 0) {
rc = GDK_FAIL;
goto doreturn;
@@ -3926,7 +3929,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
bool lhash, plhash, lcand;
double lcost;
- lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
+ lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
if (lcost < 0) {
rc = GDK_FAIL;
goto doreturn;
@@ -4260,8 +4263,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
goto doreturn;
}
- lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
- rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
+ lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
+ rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
if (lcost < 0 || rcost < 0) {
rc = GDK_FAIL;
goto doreturn;
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -235,6 +235,8 @@ void IMPSincref(Imprints *imprints)
void IMPSprint(BAT *b) /* never called: for debugging only */
__attribute__((__cold__));
#endif
+double joincost(BAT *r, BUN lcount, struct canditer *rci, bool *hash, bool
*phash, bool *cand)
+ __attribute__((__visibility__("hidden")));
void STRMPincref(Strimps *strimps)
__attribute__((__visibility__("hidden")));
void STRMPdecref(Strimps *strimps, bool remove)
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1495,6 +1495,8 @@ BATselect(BAT *b, BAT *s, const void *tl
return bn;
}
+ /* figure out how the searched for range compares with the known
+ * minimum and maximum values */
if (anti) {
int c;
@@ -1614,82 +1616,37 @@ BATselect(BAT *b, BAT *s, const void *tl
* persistent and the total size wouldn't be too large; check
* for existence of hash last since that may involve I/O */
if (equi) {
- havehash = BATcheckhash(b);
- if (havehash) {
- MT_rwlock_rdlock(&b->thashlock);
- if (b->thash == NULL) {
- MT_rwlock_rdunlock(&b->thashlock);
- havehash = false;
+ double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
+ if (cost > 0 && cost < ci.ncand) {
+ wanthash = true;
+ if (havehash) {
+ if (phash) {
+ MT_rwlock_rdlock(&pb->thashlock);
+ if (pb->thash == NULL) {
+
MT_rwlock_rdunlock(&pb->thashlock);
+ havehash = false;
+ }
+ } else {
+ MT_rwlock_rdlock(&b->thashlock);
+ if (b->thash == NULL) {
+
MT_rwlock_rdunlock(&b->thashlock);
+ havehash = false;
+ }
+ }
}
- }
- wanthash = havehash ||
- ((!bi.transient ||
- (b->batRole == PERSISTENT && GDKinmemory(0))) &&
- ATOMsize(bi.type) >= sizeof(BUN) / 4 &&
- bi.count * (ATOMsize(bi.type) + 2 * sizeof(BUN)) <
GDK_mem_maxsize / 2);
- if (!wanthash) {
- MT_lock_set(&b->theaplock);
- if (++b->selcnt > 1000) {
- wanthash = true;
- b->selcnt = 1000; /* limit value */
+ if (wanthash && !havehash) {
+ MT_lock_set(&b->theaplock);
+ if (++b->selcnt > 1000)
+ b->selcnt = 1000; /* limit value */
+ else
+ wanthash = false;
+ MT_lock_unset(&b->theaplock);
}
- MT_lock_unset(&b->theaplock);
- }
- if (wanthash && !havehash) {
- if (bi.unique_est != 0 &&
- bi.unique_est < bi.count / no_hash_select_fraction)
{
- /* too many duplicates: not worth it */
- wanthash = false;
- }
+ } else {
+ wanthash = havehash = false;
}
}
- if (equi && !havehash && pb != NULL) {
- /* use parent hash if it already exists and if either
- * a quick check shows the value we're looking for
- * does not occur, or if it is cheaper to check the
- * candidate list for each value in the hash chain
- * than to scan (cost for probe is average length of
- * hash chain (count divided by #slots) times the cost
- * to do a binary search on the candidate list (or 1
- * if no need for search)) */
- if (BATcheckhash(pb)) {
- MT_rwlock_rdlock(&pb->thashlock);
- phash = pb->thash != NULL &&
- (pbi.count == bi.count ||
- pbi.count / pb->thash->nheads * (ci.tpe !=
cand_dense ? ilog2(BATcount(s)) : 1) < (s ? BATcount(s) : bi.count) ||
- HASHget(pb->thash, HASHprobe(pb->thash, tl))
== BUN_NONE);
- if (phash)
- havehash = wanthash = true;
- else
- MT_rwlock_rdunlock(&pb->thashlock);
- }
- /* create a hash on the parent bat (and use it) if it is
- * the same size as the view and it is persistent */
- bool wantphash = false;
- if (!phash) {
- MT_lock_set(&pb->theaplock);
- if (++pb->selcnt > 1000) {
- wantphash = true;
- pb->selcnt = 1000;
- }
- MT_lock_unset(&pb->theaplock);
- }
- if (!phash &&
- (!pbi.transient ||
- wantphash ||
- (pb->batRole == PERSISTENT && GDKinmemory(0))) &&
- pbi.count == bi.count &&
- (pbi.unique_est == 0 ||
- pbi.unique_est >= pbi.count / no_hash_select_fraction) &&
- BAThash(pb) == GDK_SUCCEED) {
- MT_rwlock_rdlock(&pb->thashlock);
- if (pb->thash)
- havehash = wanthash = phash = true;
- else
- MT_rwlock_rdunlock(&pb->thashlock);
- }
- }
/* at this point, if havehash is set, we have the hash lock
* the lock is on the parent if phash is set, on b itself if not
* also, if havehash is set, then so is wanthash (but not v.v.) */
diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
--- a/sql/include/sql_catalog.h
+++ b/sql/include/sql_catalog.h
@@ -321,7 +321,7 @@ typedef struct sql_trans {
list *dependencies; /* list of dependencies created (list of sqlids
from the objects) */
list *depchanges; /* list of dependencies changed (it would be
tested for conflicts at the end of the transaction) */
- int logchanges; /* count number of changes to be applied to the
wal */
+ lng logchanges; /* count number of changes to be applied to the
wal */
int active; /* is active transaction */
int status; /* status of the last query */
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]