Changeset: a6b6796177b6 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a6b6796177b6
Modified Files:
        gdk/gdk_batop.c
Branch: Dec2016
Log Message:

Use bulk processing to append to a BOUND2BTRUE column.


diffs (171 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -79,9 +79,7 @@ insert_string_bat(BAT *b, BAT *n, int fo
        tp = NULL;
        if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) &&
            !GDK_ELIMDOUBLES(n->tvheap) &&
-           b->tvheap->hashash == n->tvheap->hashash &&
-           /* if needs to be kept unique, take slow path */
-           (b->tkey & BOUND2BTRUE) == 0) {
+           b->tvheap->hashash == n->tvheap->hashash) {
                if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) {
                        /* If b is in the transient farm (i.e. b will
                         * never become persistent), we try some
@@ -276,19 +274,6 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        }
                        bunfastapp(b, tp);
                }
-       } else if (b->tkey & BOUND2BTRUE) {
-               BUN i = BUNlast(b);
-               /* if no duplicate values allowed, insert one-by-one */
-               BATloop(n, p, q) {
-                       tp = BUNtvar(ni, p);
-                       if (BUNfnd(b, tp) == BUN_NONE) {
-                               bunfastapp(b, tp);
-                               if (b->thash) {
-                                       HASHins(b, i, tp);
-                               }
-                               i++;
-                       }
-               }
        } else if (b->tvheap->free < n->tvheap->free / 2) {
                /* if b's string heap is much smaller than n's string
                 * heap, don't bother checking whether n's string
@@ -382,7 +367,36 @@ BATappend(BAT *b, BAT *n, bit force)
        ALIGNapp(b, "BATappend", force, GDK_FAIL);
        BATcompatible(b, n, GDK_FAIL, "BATappend");
 
+       if (b->tkey & BOUND2BTRUE) {
+               /* if b has the BOUND2BTRUE bit set, only insert
+                * values from n that don't already occur in b, and
+                * make sure we don't insert any duplicates either; we
+                * do this by calculating a subset of n that complies
+                * with this */
+               BAT *d, *u;
+
+               d = BATdiff(n, b, NULL, NULL, 1, BUN_NONE);
+               if (d == NULL)
+                       return GDK_FAIL;
+               u = BATunique(n, d);
+               BBPunfix(d->batCacheid);
+               if (u == NULL)
+                       return GDK_FAIL;
+               n = BATproject(u, n);
+               BBPunfix(u->batCacheid);
+               if (n == NULL)
+                       return GDK_FAIL;
+               sz = BATcount(n);
+               if (sz == 0) {
+                       /* no new values in subset of n */
+                       BBPunfix(n->batCacheid);
+                       return GDK_SUCCEED;
+               }
+       }
+
        if (BUNlast(b) + BATcount(n) > BUN_MAX) {
+               if (b->tkey & BOUND2BTRUE)
+                       BBPunfix(n->batCacheid);
                GDKerror("BATappend: combined BATs too large\n");
                return GDK_FAIL;
        }
@@ -416,18 +430,23 @@ BATappend(BAT *b, BAT *n, bit force)
                if (BATtdense(n) && BATcount(b) + b->tseqbase == f) {
                        sz += BATcount(b);
                        BATsetcount(b, sz);
+                       if (b->tkey & BOUND2BTRUE)
+                               BBPunfix(n->batCacheid);
                        return GDK_SUCCEED;
                }
                /* we need to materialize the tail */
-               if (BATmaterialize(b) != GDK_SUCCEED)
+               if (BATmaterialize(b) != GDK_SUCCEED) {
+                       if (b->tkey & BOUND2BTRUE)
+                               BBPunfix(n->batCacheid);
                        return GDK_FAIL;
+               }
        }
 
        /* if growing too much, remove the hash, else we maintain it */
        if (BATcheckhash(b) && (2 * b->thash->mask) < (BATcount(b) + sz)) {
                HASHdestroy(b);
        }
-       if (b->thash != NULL || (b->tkey & BOUND2BTRUE) != 0)
+       if (b->thash != NULL)
                fastpath = 0;
 
        if (fastpath) {
@@ -479,8 +498,11 @@ BATappend(BAT *b, BAT *n, bit force)
                        }
                }
                if (b->ttype == TYPE_str) {
-                       if (insert_string_bat(b, n, force) != GDK_SUCCEED)
+                       if (insert_string_bat(b, n, force) != GDK_SUCCEED) {
+                               if (b->tkey & BOUND2BTRUE)
+                                       BBPunfix(n->batCacheid);
                                return GDK_FAIL;
+                       }
                } else {
                        if (!ATOMvarsized(b->ttype) &&
                            BATatoms[b->ttype].atomFix == NULL &&
@@ -504,41 +526,32 @@ BATappend(BAT *b, BAT *n, bit force)
                BUN i = BUNlast(b);
                BATiter ni = bat_iterator(n);
 
-               if (b->tkey & BOUND2BTRUE) {
-                       b->tdense = b->tsorted = b->trevsorted = 0;
-                       BATloop(n, p, q) {
-                               const void *t = BUNtail(ni, p);
+               if (b->hseqbase + BATcount(b) + BATcount(n) >= GDK_oid_max) {
+                       if (b->tkey & BOUND2BTRUE)
+                               BBPunfix(n->batCacheid);
+                       GDKerror("BATappend: overflow of head value\n");
+                       return GDK_FAIL;
+               }
 
-                               if (BUNfnd(b, t) == BUN_NONE) {
-                                       bunfastapp(b, t);
-                                       if (b->thash) {
-                                               HASHins(b, i, t);
-                                       }
-                                       i++;
-                               }
+               BATloop(n, p, q) {
+                       const void *t = BUNtail(ni, p);
+
+                       bunfastapp(b, t);
+                       if (b->thash) {
+                               HASHins(b, i, t);
                        }
-               } else {
-                       if (b->hseqbase + BATcount(b) + BATcount(n) >= 
GDK_oid_max) {
-                               GDKerror("BATappend: overflow of head value\n");
-                               return GDK_FAIL;
-                       }
-
-                       BATloop(n, p, q) {
-                               const void *t = BUNtail(ni, p);
-
-                               bunfastapp(b, t);
-                               if (b->thash) {
-                                       HASHins(b, i, t);
-                               }
-                               i++;
-                       }
-                       BATkey(b, FALSE);
-                       b->tdense = b->tsorted = b->trevsorted = 0;
+                       i++;
                }
+               BATkey(b, FALSE);
+               b->tdense = b->tsorted = b->trevsorted = 0;
        }
        b->tnonil &= n->tnonil;
+       if (b->tkey & BOUND2BTRUE)
+               BBPunfix(n->batCacheid);
        return GDK_SUCCEED;
       bunins_failed:
+       if (b->tkey & BOUND2BTRUE)
+               BBPunfix(n->batCacheid);
        return GDK_FAIL;
 }
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to