Changeset: 1f250f6b56ba for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1f250f6b56ba
Modified Files:
        gdk/gdk.h
        gdk/gdk_select.c
Branch: default
Log Message:

A couple of changes to speed up unique constraint checking.
If we notice that we're doing a whole bunch of point selects on a
transient bat, it's time to create a hash for it.  Also, when there is a
hash, and the hash indicates all values are distinct, use that for
figuring out an estimate for the result size.


diffs (77 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -775,6 +775,8 @@ typedef struct BAT {
         batTransient:1;        /* should the BAT persist on disk? */
        uint8_t /* adjacent bit fields are packed together (if they fit) */
         batRestricted:2;       /* access privileges */
+       uint16_t /* adjacent bit fields are packed together (if they fit) */
+        selcnt:10;             /* how often used in equi select without hash */
        role_t batRole;         /* role of the bat */
        uint16_t unused;        /* value=0 for now (sneakily used by mat.c) */
        int batSharecnt;        /* incoming view count */
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1603,6 +1603,14 @@ BATselect(BAT *b, BAT *s, const void *tl
                        (!b->batTransient &&
                         ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
                         BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) < 
GDK_mem_maxsize / 2);
+               if (!wanthash) {
+                       MT_lock_set(&b->theaplock);
+                       if (++b->selcnt > 1000) {
+                               wanthash = true;
+                               b->selcnt = 1000; /* limit value */
+                       }
+                       MT_lock_unset(&b->theaplock);
+               }
                if (wanthash && !havehash) {
                        MT_lock_set(&b->theaplock);
                        if (b->tunique_est != 0 &&
@@ -1624,7 +1632,7 @@ BATselect(BAT *b, BAT *s, const void *tl
                 * to do a binary search on the candidate list (or 1
                 * if no need for search)) */
                tmp = BBP_cache(parent);
-               if (tmp && BATcheckhash(tmp)) {
+               if (BATcheckhash(tmp)) {
                        MT_rwlock_rdlock(&tmp->thashlock);
                        phash = tmp->thash != NULL &&
                                (BATcount(tmp) == BATcount(b) ||
@@ -1637,8 +1645,17 @@ BATselect(BAT *b, BAT *s, const void *tl
                }
                /* create a hash on the parent bat (and use it) if it is
                 * the same size as the view and it is persistent */
+               bool wantphash = false;
+               if (!phash) {
+                       MT_lock_set(&tmp->theaplock);
+                       if (++tmp->selcnt > 1000) {
+                               wantphash = true;
+                               tmp->selcnt = 1000;
+                       }
+                       MT_lock_unset(&tmp->theaplock);
+               }
                if (!phash &&
-                   !tmp->batTransient &&
+                   (!tmp->batTransient || wantphash) &&
                    BATcount(tmp) == BATcount(b) &&
                    BAThash(tmp) == GDK_SUCCEED) {
                        MT_rwlock_rdlock(&tmp->thashlock);
@@ -1885,7 +1902,17 @@ BATselect(BAT *b, BAT *s, const void *tl
        assert(oidxh == NULL);
        /* upper limit for result size */
        maximum = ci.ncand;
-       if (b->tkey) {
+       if (equi && havehash) {
+               /* we can look in the hash struct to see whether all
+                * values are distinct and set estimate accordingly */
+               if (phash) {
+                       BAT *tmp = BBP_cache(parent);
+                       if (tmp->thash->nunique == tmp->batCount)
+                               estimate = 1;
+               } else if (b->thash->nunique == b->batCount)
+                       estimate = 1;
+       }
+       if (estimate == BUN_NONE && (b->tkey || (parent != 0 && 
BBP_cache(parent)->tkey))) {
                /* exact result size in special cases */
                if (equi) {
                        estimate = 1;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to