This is an automated email from the ASF dual-hosted git repository.

zyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e9ef0aa4a9 PBTree: Prevent Concurrent Read With Stale Page (#11831)
8e9ef0aa4a9 is described below

commit 8e9ef0aa4a98905f585bd6de69f9fb63b6834d69
Author: ZhaoXin <[email protected]>
AuthorDate: Wed Jan 3 20:51:59 2024 +0800

    PBTree: Prevent Concurrent Read With Stale Page (#11831)
---
 .../schemafile/pagemgr/BTreePageManager.java       | 68 ++++++++++++++++++++--
 .../pbtree/schemafile/pagemgr/PageManager.java     | 11 ++--
 2 files changed, 69 insertions(+), 10 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
index 9cef58464d6..00bafcaf2ff 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
@@ -392,6 +392,62 @@ public class BTreePageManager extends PageManager {
     }
   }
 
+  /**
+   * Since transplant and replacement may invalidate page instance held by a 
blocked read-thread,
+   * the read-thread need to validate its page object once it obtained the 
lock.
+   *
+   * @param initPage page instance with read lock held
+   * @param parent MTree node to read children, containing latest segment 
address
+   * @param cxt update context if page/segment modified
+   * @return validated page
+   */
+  private ISchemaPage validatePage(ISchemaPage initPage, ICachedMNode parent, 
SchemaPageContext cxt)
+      throws IOException, MetadataException {
+    boolean safeFlag = false;
+    // check like crabbing
+    ISchemaPage crabPage;
+    while (!safeFlag) {
+      SchemaPageContext doubleCheckContext = new SchemaPageContext();
+      if (getPageIndex(getNodeAddress(parent)) != initPage.getPageIndex()) {
+        // transplanted, release the stale and check the new
+        long addrB4Lock = getNodeAddress(parent);
+        int piB4Lock = getPageIndex(addrB4Lock);
+
+        crabPage = getPageInstance(piB4Lock, doubleCheckContext);
+        crabPage.getLock().readLock().lock();
+
+        initPage.decrementAndGetRefCnt();
+        initPage.getLock().readLock().unlock();
+
+        // UNNECESSARY to TRACE lock since the very page will be unlocked at 
end of read
+        cxt.referredPages.remove(initPage.getPageIndex());
+        cxt.referredPages.put(crabPage.getPageIndex(), crabPage);
+        initPage = crabPage;
+        continue;
+      }
+
+      crabPage = getPageInstance(initPage.getPageIndex(), doubleCheckContext);
+      if (crabPage != initPage) {
+        // replaced, the lock and ref count should be the same
+        if (crabPage.getLock() != initPage.getLock()
+            || crabPage.getRefCnt() != initPage.getRefCnt()) {
+          crabPage.decrementAndGetRefCnt();
+          initPage.decrementAndGetRefCnt();
+          initPage.getLock().readLock().unlock();
+          throw new MetadataException(
+              "Page[%d] replacement error: Different ref count or lock 
object.");
+        }
+        // update context is enough, ref and lock is left for main read process
+        cxt.referredPages.put(initPage.getPageIndex(), crabPage);
+        initPage = crabPage;
+      }
+      // same page shall only be referred once
+      crabPage.decrementAndGetRefCnt();
+      safeFlag = true;
+    }
+    return initPage;
+  }
+
   @Override
   public ICachedMNode getChildNode(ICachedMNode parent, String childName)
       throws MetadataException, IOException {
@@ -406,7 +462,9 @@ public class BTreePageManager extends PageManager {
 
     // a single read lock on initial page is sufficient to mutex write, no 
need to trace it
     int initIndex = getPageIndex(getNodeAddress(parent));
-    getPageInstance(initIndex, cxt).getLock().readLock().lock();
+    ISchemaPage initPage = getPageInstance(initIndex, cxt);
+    initPage.getLock().readLock().lock();
+    initPage = validatePage(initPage, parent, cxt);
     try {
       long actualSegAddr = getTargetSegmentAddress(getNodeAddress(parent), 
childName, cxt);
       ICachedMNode child =
@@ -429,7 +487,7 @@ public class BTreePageManager extends PageManager {
       }
       return child;
     } finally {
-      getPageInstance(initIndex, cxt).getLock().readLock().unlock();
+      initPage.getLock().readLock().unlock();
       releaseReferent(cxt);
       threadContexts.remove(Thread.currentThread().getId(), cxt);
     }
@@ -460,8 +518,10 @@ public class BTreePageManager extends PageManager {
     int pageIdx = getPageIndex(getNodeAddress(parent));
 
     short segId = getSegIndex(getNodeAddress(parent));
-    ISchemaPage page = getPageInstance(pageIdx, cxt);
+    ISchemaPage page = getPageInstance(pageIdx, cxt), pageHeldLock;
     page.getLock().readLock().lock();
+    page = validatePage(page, parent, cxt);
+    pageHeldLock = page;
 
     try {
       while (page.getAsSegmentedPage() == null) {
@@ -512,7 +572,7 @@ public class BTreePageManager extends PageManager {
       };
     } finally {
       // safety of iterator should be guaranteed by upper layer
-      getPageInstance(pageIdx, cxt).getLock().readLock().unlock();
+      pageHeldLock.getLock().readLock().unlock();
       releaseReferent(cxt);
     }
   }
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
index f48d25a67f4..5fecad18d6d 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
@@ -236,7 +236,7 @@ public abstract class PageManager implements IPageManager {
   /** Context only tracks write locks, and so it shall be released. */
   protected void releaseLocks(SchemaPageContext cxt) {
     for (int i : cxt.lockTraces) {
-      pageInstCache.get(i).getLock().writeLock().unlock();
+      cxt.referredPages.get(i).getLock().writeLock().unlock();
     }
   }
 
@@ -788,8 +788,7 @@ public abstract class PageManager implements IPageManager {
         cxt.traceLock(targetPage);
 
         // transfer the page from pageIndexBuckets to cxt.buckets thus not be 
accessed by other
-        // WRITE
-        // thread
+        //  WRITE thread
         cxt.indexBuckets.sortIntoBucket(targetPage, size);
         return targetPage.getAsSegmentedPage();
       }
@@ -1020,11 +1019,11 @@ public abstract class PageManager implements 
IPageManager {
       interleavedFlushCnt = 0;
     }
 
-    public void markDirty(ISchemaPage page) {
+    protected void markDirty(ISchemaPage page) {
       markDirty(page, false);
     }
 
-    private void markDirty(ISchemaPage page, boolean forceReplace) {
+    protected void markDirty(ISchemaPage page, boolean forceReplace) {
       if (!page.isDirtyPage()) {
         dirtyCnt++;
       }
@@ -1041,7 +1040,7 @@ public abstract class PageManager implements IPageManager 
{
       }
     }
 
-    private void traceLock(ISchemaPage page) {
+    protected void traceLock(ISchemaPage page) {
       refer(page);
       lockTraces.add(page.getPageIndex());
     }

Reply via email to