This is an automated email from the ASF dual-hosted git repository.
zyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 8e9ef0aa4a9 PBTree: Prevent Concurrent Read With Stale Page (#11831)
8e9ef0aa4a9 is described below
commit 8e9ef0aa4a98905f585bd6de69f9fb63b6834d69
Author: ZhaoXin <[email protected]>
AuthorDate: Wed Jan 3 20:51:59 2024 +0800
PBTree: Prevent Concurrent Read With Stale Page (#11831)
---
.../schemafile/pagemgr/BTreePageManager.java | 68 ++++++++++++++++++++--
.../pbtree/schemafile/pagemgr/PageManager.java | 11 ++--
2 files changed, 69 insertions(+), 10 deletions(-)
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
index 9cef58464d6..00bafcaf2ff 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/BTreePageManager.java
@@ -392,6 +392,62 @@ public class BTreePageManager extends PageManager {
}
}
+ /**
+ * Since transplant and replacement may invalidate page instance held by a
blocked read-thread,
+ * the read-thread need to validate its page object once it obtained the
lock.
+ *
+ * @param initPage page instance with read lock held
+ * @param parent MTree node to read children, containing latest segment
address
+ * @param cxt update context if page/segment modified
+ * @return validated page
+ */
+ private ISchemaPage validatePage(ISchemaPage initPage, ICachedMNode parent,
SchemaPageContext cxt)
+ throws IOException, MetadataException {
+ boolean safeFlag = false;
+ // check like crabbing
+ ISchemaPage crabPage;
+ while (!safeFlag) {
+ SchemaPageContext doubleCheckContext = new SchemaPageContext();
+ if (getPageIndex(getNodeAddress(parent)) != initPage.getPageIndex()) {
+ // transplanted, release the stale and check the new
+ long addrB4Lock = getNodeAddress(parent);
+ int piB4Lock = getPageIndex(addrB4Lock);
+
+ crabPage = getPageInstance(piB4Lock, doubleCheckContext);
+ crabPage.getLock().readLock().lock();
+
+ initPage.decrementAndGetRefCnt();
+ initPage.getLock().readLock().unlock();
+
+ // UNNECESSARY to TRACE lock since the very page will be unlocked at
end of read
+ cxt.referredPages.remove(initPage.getPageIndex());
+ cxt.referredPages.put(crabPage.getPageIndex(), crabPage);
+ initPage = crabPage;
+ continue;
+ }
+
+ crabPage = getPageInstance(initPage.getPageIndex(), doubleCheckContext);
+ if (crabPage != initPage) {
+ // replaced, the lock and ref count should be the same
+ if (crabPage.getLock() != initPage.getLock()
+ || crabPage.getRefCnt() != initPage.getRefCnt()) {
+ crabPage.decrementAndGetRefCnt();
+ initPage.decrementAndGetRefCnt();
+ initPage.getLock().readLock().unlock();
+ throw new MetadataException(
+ "Page[%d] replacement error: Different ref count or lock
object.");
+ }
+ // update context is enough, ref and lock is left for main read process
+ cxt.referredPages.put(initPage.getPageIndex(), crabPage);
+ initPage = crabPage;
+ }
+ // same page shall only be referred once
+ crabPage.decrementAndGetRefCnt();
+ safeFlag = true;
+ }
+ return initPage;
+ }
+
@Override
public ICachedMNode getChildNode(ICachedMNode parent, String childName)
throws MetadataException, IOException {
@@ -406,7 +462,9 @@ public class BTreePageManager extends PageManager {
// a single read lock on initial page is sufficient to mutex write, no
need to trace it
int initIndex = getPageIndex(getNodeAddress(parent));
- getPageInstance(initIndex, cxt).getLock().readLock().lock();
+ ISchemaPage initPage = getPageInstance(initIndex, cxt);
+ initPage.getLock().readLock().lock();
+ initPage = validatePage(initPage, parent, cxt);
try {
long actualSegAddr = getTargetSegmentAddress(getNodeAddress(parent),
childName, cxt);
ICachedMNode child =
@@ -429,7 +487,7 @@ public class BTreePageManager extends PageManager {
}
return child;
} finally {
- getPageInstance(initIndex, cxt).getLock().readLock().unlock();
+ initPage.getLock().readLock().unlock();
releaseReferent(cxt);
threadContexts.remove(Thread.currentThread().getId(), cxt);
}
@@ -460,8 +518,10 @@ public class BTreePageManager extends PageManager {
int pageIdx = getPageIndex(getNodeAddress(parent));
short segId = getSegIndex(getNodeAddress(parent));
- ISchemaPage page = getPageInstance(pageIdx, cxt);
+ ISchemaPage page = getPageInstance(pageIdx, cxt), pageHeldLock;
page.getLock().readLock().lock();
+ page = validatePage(page, parent, cxt);
+ pageHeldLock = page;
try {
while (page.getAsSegmentedPage() == null) {
@@ -512,7 +572,7 @@ public class BTreePageManager extends PageManager {
};
} finally {
// safety of iterator should be guaranteed by upper layer
- getPageInstance(pageIdx, cxt).getLock().readLock().unlock();
+ pageHeldLock.getLock().readLock().unlock();
releaseReferent(cxt);
}
}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
index f48d25a67f4..5fecad18d6d 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/mtree/impl/pbtree/schemafile/pagemgr/PageManager.java
@@ -236,7 +236,7 @@ public abstract class PageManager implements IPageManager {
/** Context only tracks write locks, and so it shall be released. */
protected void releaseLocks(SchemaPageContext cxt) {
for (int i : cxt.lockTraces) {
- pageInstCache.get(i).getLock().writeLock().unlock();
+ cxt.referredPages.get(i).getLock().writeLock().unlock();
}
}
@@ -788,8 +788,7 @@ public abstract class PageManager implements IPageManager {
cxt.traceLock(targetPage);
// transfer the page from pageIndexBuckets to cxt.buckets thus not be
accessed by other
- // WRITE
- // thread
+ // WRITE thread
cxt.indexBuckets.sortIntoBucket(targetPage, size);
return targetPage.getAsSegmentedPage();
}
@@ -1020,11 +1019,11 @@ public abstract class PageManager implements
IPageManager {
interleavedFlushCnt = 0;
}
- public void markDirty(ISchemaPage page) {
+ protected void markDirty(ISchemaPage page) {
markDirty(page, false);
}
- private void markDirty(ISchemaPage page, boolean forceReplace) {
+ protected void markDirty(ISchemaPage page, boolean forceReplace) {
if (!page.isDirtyPage()) {
dirtyCnt++;
}
@@ -1041,7 +1040,7 @@ public abstract class PageManager implements IPageManager
{
}
}
- private void traceLock(ISchemaPage page) {
+ protected void traceLock(ISchemaPage page) {
refer(page);
lockTraces.add(page.getPageIndex());
}