Changeset: 1f250f6b56ba for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1f250f6b56ba
Modified Files:
gdk/gdk.h
gdk/gdk_select.c
Branch: default
Log Message:
A couple of changes to speed up unique constraint checking.
If we notice that we're doing a whole bunch of point selects on a
transient bat, it's time to create a hash for it. Also, when there is a
hash, and the hash indicates all values are distinct, use that for
figuring out an estimate for the result size.
diffs (77 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -775,6 +775,8 @@ typedef struct BAT {
batTransient:1; /* should the BAT persist on disk? */
uint8_t /* adjacent bit fields are packed together (if they fit) */
batRestricted:2; /* access privileges */
+ uint16_t /* adjacent bit fields are packed together (if they fit) */
+ selcnt:10; /* how often used in equi select without hash */
role_t batRole; /* role of the bat */
uint16_t unused; /* value=0 for now (sneakily used by mat.c) */
int batSharecnt; /* incoming view count */
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1603,6 +1603,14 @@ BATselect(BAT *b, BAT *s, const void *tl
(!b->batTransient &&
ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) <
GDK_mem_maxsize / 2);
+ if (!wanthash) {
+ MT_lock_set(&b->theaplock);
+ if (++b->selcnt > 1000) {
+ wanthash = true;
+ b->selcnt = 1000; /* limit value */
+ }
+ MT_lock_unset(&b->theaplock);
+ }
if (wanthash && !havehash) {
MT_lock_set(&b->theaplock);
if (b->tunique_est != 0 &&
@@ -1624,7 +1632,7 @@ BATselect(BAT *b, BAT *s, const void *tl
* to do a binary search on the candidate list (or 1
* if no need for search)) */
tmp = BBP_cache(parent);
- if (tmp && BATcheckhash(tmp)) {
+ if (BATcheckhash(tmp)) {
MT_rwlock_rdlock(&tmp->thashlock);
phash = tmp->thash != NULL &&
(BATcount(tmp) == BATcount(b) ||
@@ -1637,8 +1645,17 @@ BATselect(BAT *b, BAT *s, const void *tl
}
/* create a hash on the parent bat (and use it) if it is
* the same size as the view and it is persistent */
+ bool wantphash = false;
+ if (!phash) {
+ MT_lock_set(&tmp->theaplock);
+ if (++tmp->selcnt > 1000) {
+ wantphash = true;
+ tmp->selcnt = 1000;
+ }
+ MT_lock_unset(&tmp->theaplock);
+ }
if (!phash &&
- !tmp->batTransient &&
+ (!tmp->batTransient || wantphash) &&
BATcount(tmp) == BATcount(b) &&
BAThash(tmp) == GDK_SUCCEED) {
MT_rwlock_rdlock(&tmp->thashlock);
@@ -1885,7 +1902,17 @@ BATselect(BAT *b, BAT *s, const void *tl
assert(oidxh == NULL);
/* upper limit for result size */
maximum = ci.ncand;
- if (b->tkey) {
+ if (equi && havehash) {
+ /* we can look in the hash struct to see whether all
+ * values are distinct and set estimate accordingly */
+ if (phash) {
+ BAT *tmp = BBP_cache(parent);
+ if (tmp->thash->nunique == tmp->batCount)
+ estimate = 1;
+ } else if (b->thash->nunique == b->batCount)
+ estimate = 1;
+ }
+ if (estimate == BUN_NONE && (b->tkey || (parent != 0 &&
BBP_cache(parent)->tkey))) {
/* exact result size in special cases */
if (equi) {
estimate = 1;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]