Changeset: d0d459b6e7fa for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d0d459b6e7fa
Modified Files:
        gdk/gdk_batop.c
Branch: Mar2025
Log Message:

When unsharing a string heap, reinsert strings if view significantly smaller.


diffs (86 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -30,7 +30,6 @@ unshare_varsized_heap(BAT *b)
                Heap *h = GDKmalloc(sizeof(Heap));
                if (h == NULL)
                        return GDK_FAIL;
-               MT_thread_setalgorithm("unshare vheap");
                *h = (Heap) {
                        .parentid = b->batCacheid,
                        .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
@@ -38,6 +37,74 @@ unshare_varsized_heap(BAT *b)
                };
                strconcat_len(h->filename, sizeof(h->filename),
                              BBP_physical(b->batCacheid), ".theap", NULL);
+               /* if parent bat is much larger (currently more than
+                * twice) than the view, we copy the strings
+                * individually so that the resulting vheap is reduced
+                * in size
+                * we do this only if there are no other references to
+                * the bat since we're changing the offset heap and the
+                * values are going to be temporarily incorrect */
+               MT_lock_set(&GDKswapLock(b->batCacheid));
+               if (BBP_refs(b->batCacheid) == 1 &&
+                   BATcount(BBP_desc(b->tvheap->parentid)) > 2 * BATcount(b)) {
+                       MT_thread_setalgorithm("unshare vheap reinsert 
strings");
+                       MT_lock_set(&b->theaplock);
+                       strHeap(h, b->batCapacity);
+                       Heap *oh = b->tvheap;
+                       b->tvheap = h;
+                       var_t o;
+                       switch (b->twidth) {
+                       case 1:
+                               for (BUN i = 0; i < b->batCount; i++) {
+                                       o = (var_t) ((uint8_t *) 
b->theap->base)[i] + GDK_VAROFFSET;
+                                       if (strPut(b, &o, oh->base + o) == 
(var_t) -1)
+                                               goto bailout;
+                                       ((uint8_t *) b->theap->base)[i] = 
(uint8_t) (o - GDK_VAROFFSET);
+                               }
+                               break;
+                       case 2:
+                               for (BUN i = 0; i < b->batCount; i++) {
+                                       o = (var_t) ((uint16_t *) 
b->theap->base)[i] + GDK_VAROFFSET;
+                                       if (strPut(b, &o, oh->base + o) == 
(var_t) -1)
+                                               goto bailout;
+                                       ((uint16_t *) b->theap->base)[i] = 
(uint16_t) (o - GDK_VAROFFSET);
+                               }
+                               break;
+#if SIZEOF_VAR_T == 8
+                       case 4:
+                               for (BUN i = 0; i < b->batCount; i++) {
+                                       o = (var_t) ((uint32_t *) 
b->theap->base)[i];
+                                       if (strPut(b, &o, oh->base + o) == 
(var_t) -1)
+                                               goto bailout;
+                                       ((uint32_t *) b->theap->base)[i] = 
(uint32_t) o;
+                               }
+                               break;
+#endif
+                       case SIZEOF_VAR_T:
+                               for (BUN i = 0; i < b->batCount; i++) {
+                                       o = ((var_t *) b->theap->base)[i];
+                                       if (strPut(b, &o, oh->base + o) == 
(var_t) -1)
+                                               goto bailout;
+                                       ((var_t *) b->theap->base)[i] = o;
+                               }
+                               break;
+                       default:
+                               MT_UNREACHABLE();
+                       }
+                       MT_lock_unset(&b->theaplock);
+                       MT_lock_unset(&GDKswapLock(b->batCacheid));
+                       BBPrelease(oh->parentid);
+                       HEAPdecref(oh, false);
+                       return GDK_SUCCEED;
+                 bailout:
+                       MT_lock_unset(&b->theaplock);
+                       MT_lock_unset(&GDKswapLock(b->batCacheid));
+                       BBPrelease(oh->parentid);
+                       HEAPdecref(oh, false);
+                       return GDK_FAIL;
+               }
+               MT_lock_unset(&GDKswapLock(b->batCacheid));
+               MT_thread_setalgorithm("unshare vheap by copying");
                if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
                        HEAPfree(h, true);
                        GDKfree(h);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to