Changeset: 2e3d1d0cbc4a for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/2e3d1d0cbc4a Modified Files: gdk/ChangeLog.Sep2022 gdk/gdk_bbp.c gdk/gdk_tm.c Branch: Sep2022 Log Message:
Fixed a race between BBPsync and GDKupgradevarheap that resulted in a missing file. If GDKupgradevarheap was called when the bat was being committed, if the timing was just "right", the newly saved file (by BBPsync) might get removed by the commit epilogue. GDKupgradevarheap recorded the old tail in oldtail, and epilogue just removed all oldtail heaps, even if the oldtail pointer was NULL when BBPsync started. diffs (99 lines): diff --git a/gdk/ChangeLog.Sep2022 b/gdk/ChangeLog.Sep2022 --- a/gdk/ChangeLog.Sep2022 +++ b/gdk/ChangeLog.Sep2022 @@ -1,6 +1,10 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Thu Feb 16 2023 Sjoerd Mullender <[email protected]> +- A race condition was fixed that could result in a missing tail file + for a string bat (i.e. a file with .tail1, .tail2, or .tail4 extension). + * Mon Feb 13 2023 Sjoerd Mullender <[email protected]> - When saving a bat failed for some reason during a low-level commit, this was logged in the log file, but the error was then subsequently diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -3880,7 +3880,21 @@ BBPsync(int cnt, bat *restrict subcommit if (size > bi.count) /* includes sizes==NULL */ size = bi.count; bi.b->batInserted = size; - if (b && size != 0) { + if (ATOMvarsized(bi.b->ttype)) { + /* see epilogue() for other part of this */ + MT_lock_set(&bi.b->theaplock); + /* remember the tail we're saving */ + if (BATsetprop_nolock(bi.b, (enum prop_t) 20, TYPE_ptr, &bi.h) == NULL) { + GDKerror("setprop failed\n"); + ret = GDK_FAIL; + } else { + if (bi.b->oldtail == NULL) + bi.b->oldtail = (Heap *) 1; + HEAPincref(bi.h); + } + MT_lock_unset(&bi.b->theaplock); + } + if (ret == GDK_SUCCEED && b && size != 0) { /* wait for BBPSAVING so that we * can set it, wait for * BBPUNLOADING before @@ -3958,6 +3972,23 @@ BBPsync(int cnt, bat *restrict subcommit ret == GDK_SUCCEED ? "" : " failed", (t0 = GDKms()) - t1); + if (ret != GDK_SUCCEED) { + /* clean up extra refs we created */ + for (int idx = 1; idx < cnt; idx++) { + bat i = subcommit ? subcommit[idx] : idx; + BAT *b = BBP_desc(i); + if (b && ATOMvarsized(b->ttype)) { + MT_lock_set(&b->theaplock); + ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20); + if (p != NULL) { + HEAPdecref(p->val.pval, false); + BATrmprop_nolock(b, (enum prop_t) 20); + } + MT_lock_unset(&b->theaplock); + } + } + } + /* turn off the BBPSYNCING bits for all bats, even when things * didn't go according to plan (i.e., don't check for ret == * GDK_SUCCEED) */ diff --git a/gdk/gdk_tm.c b/gdk/gdk_tm.c --- a/gdk/gdk_tm.c +++ b/gdk/gdk_tm.c @@ -83,12 +83,26 @@ epilogue(int cnt, bat *subcommit, bool l } } b = BBP_desc(bid); - if (b) { + if (b && ATOMvarsized(b->ttype)) { MT_lock_set(&b->theaplock); - if (b->oldtail) { - ATOMIC_AND(&b->oldtail->refs, ~DELAYEDREMOVE); - HEAPdecref(b->oldtail, true); - b->oldtail = NULL; + ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20); + if (p != NULL) { + Heap *tail = p->val.pval; + assert(b->oldtail != NULL); + BATrmprop_nolock(b, (enum prop_t) 20); + if (b->oldtail != (Heap *) 1) + HEAPdecref(b->oldtail, true); + if (tail == b->theap || + strcmp(tail->filename, + b->theap->filename) == 0) { + /* no upgrades done since saving + * started */ + b->oldtail = NULL; + HEAPdecref(tail, false); + } else { + b->oldtail = tail; + ATOMIC_OR(&tail->refs, DELAYEDREMOVE); + } } MT_lock_unset(&b->theaplock); } _______________________________________________ checkin-list mailing list -- [email protected] To unsubscribe send an email to [email protected]
