Changeset: 2e3d1d0cbc4a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2e3d1d0cbc4a
Modified Files:
        gdk/ChangeLog.Sep2022
        gdk/gdk_bbp.c
        gdk/gdk_tm.c
Branch: Sep2022
Log Message:

Fixed a race between BBPsync and GDKupgradevarheap that resulted in a missing 
file.
If GDKupgradevarheap was called when the bat was being committed, if the
timing was just "right", the newly saved file (by BBPsync) might get
removed by the commit epilogue.  GDKupgradevarheap recorded the old tail
in oldtail, and epilogue just removed all oldtail heaps, even if the
oldtail pointer was NULL when BBPsync started.


diffs (99 lines):

diff --git a/gdk/ChangeLog.Sep2022 b/gdk/ChangeLog.Sep2022
--- a/gdk/ChangeLog.Sep2022
+++ b/gdk/ChangeLog.Sep2022
@@ -1,6 +1,10 @@
 # ChangeLog file for GDK
 # This file is updated with Maddlog
 
+* Thu Feb 16 2023 Sjoerd Mullender <[email protected]>
+- A race condition was fixed that could result in a missing tail file
+  for a string bat (i.e. a file with .tail1, .tail2, or .tail4 extension).
+
 * Mon Feb 13 2023 Sjoerd Mullender <[email protected]>
 - When saving a bat failed for some reason during a low-level commit,
   this was logged in the log file, but the error was then subsequently
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -3880,7 +3880,21 @@ BBPsync(int cnt, bat *restrict subcommit
                        if (size > bi.count) /* includes sizes==NULL */
                                size = bi.count;
                        bi.b->batInserted = size;
-                       if (b && size != 0) {
+                       if (ATOMvarsized(bi.b->ttype)) {
+                               /* see epilogue() for other part of this */
+                               MT_lock_set(&bi.b->theaplock);
+                               /* remember the tail we're saving */
+                               if (BATsetprop_nolock(bi.b, (enum prop_t) 20, 
TYPE_ptr, &bi.h) == NULL) {
+                                       GDKerror("setprop failed\n");
+                                       ret = GDK_FAIL;
+                               } else {
+                                       if (bi.b->oldtail == NULL)
+                                               bi.b->oldtail = (Heap *) 1;
+                                       HEAPincref(bi.h);
+                               }
+                               MT_lock_unset(&bi.b->theaplock);
+                       }
+                       if (ret == GDK_SUCCEED && b && size != 0) {
                                /* wait for BBPSAVING so that we
                                 * can set it, wait for
                                 * BBPUNLOADING before
@@ -3958,6 +3972,23 @@ BBPsync(int cnt, bat *restrict subcommit
                  ret == GDK_SUCCEED ? "" : " failed",
                  (t0 = GDKms()) - t1);
 
+       if (ret != GDK_SUCCEED) {
+               /* clean up extra refs we created */
+               for (int idx = 1; idx < cnt; idx++) {
+                       bat i = subcommit ? subcommit[idx] : idx;
+                       BAT *b = BBP_desc(i);
+                       if (b && ATOMvarsized(b->ttype)) {
+                               MT_lock_set(&b->theaplock);
+                               ValPtr p = BATgetprop_nolock(b, (enum prop_t) 
20);
+                               if (p != NULL) {
+                                       HEAPdecref(p->val.pval, false);
+                                       BATrmprop_nolock(b, (enum prop_t) 20);
+                               }
+                               MT_lock_unset(&b->theaplock);
+                       }
+               }
+       }
+
        /* turn off the BBPSYNCING bits for all bats, even when things
         * didn't go according to plan (i.e., don't check for ret ==
         * GDK_SUCCEED) */
diff --git a/gdk/gdk_tm.c b/gdk/gdk_tm.c
--- a/gdk/gdk_tm.c
+++ b/gdk/gdk_tm.c
@@ -83,12 +83,26 @@ epilogue(int cnt, bat *subcommit, bool l
                        }
                }
                b = BBP_desc(bid);
-               if (b) {
+               if (b && ATOMvarsized(b->ttype)) {
                        MT_lock_set(&b->theaplock);
-                       if (b->oldtail) {
-                               ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE);
-                               HEAPdecref(b->oldtail, true);
-                               b->oldtail = NULL;
+                       ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
+                       if (p != NULL) {
+                               Heap *tail = p->val.pval;
+                               assert(b->oldtail != NULL);
+                               BATrmprop_nolock(b, (enum prop_t) 20);
+                               if (b->oldtail != (Heap *) 1)
+                                       HEAPdecref(b->oldtail, true);
+                               if (tail == b->theap ||
+                                   strcmp(tail->filename,
+                                          b->theap->filename) == 0) {
+                                       /* no upgrades done since saving
+                                        * started */
+                                       b->oldtail = NULL;
+                                       HEAPdecref(tail, false);
+                               } else {
+                                       b->oldtail = tail;
+                                       ATOMIC_OR(&tail->refs, DELAYEDREMOVE);
+                               }
                        }
                        MT_lock_unset(&b->theaplock);
                }
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to