ignite-db - rotate page id on remove
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/fa320c93 Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/fa320c93 Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/fa320c93 Branch: refs/heads/ignite-db-x-gg-11124 Commit: fa320c9310a17146758782c8c26b4868da9c035d Parents: 646ba1d Author: S.Vladykin <[email protected]> Authored: Wed Apr 27 21:10:34 2016 +0300 Committer: S.Vladykin <[email protected]> Committed: Wed Apr 27 21:10:34 2016 +0300 ---------------------------------------------------------------------- .../internal/pagemem/PageIdAllocator.java | 1 - .../ignite/internal/pagemem/PageIdUtils.java | 21 ++++ .../cache/database/tree/BPlusTree.java | 110 +++++++++++++------ .../cache/database/tree/util/PageHandler.java | 3 +- .../processors/database/BPlusTreeSelfTest.java | 4 +- 5 files changed, 101 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/fa320c93/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdAllocator.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdAllocator.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdAllocator.java index 3c7f53e..b27e422 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdAllocator.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdAllocator.java @@ -16,7 +16,6 @@ public interface PageIdAllocator { public static final byte FLAG_IDX = 2; /** - * TODO do we need a generic abstraction for flags? * Allocates a page from the space for the given partition ID and the given flags. * * @param partId Partition ID. http://git-wip-us.apache.org/repos/asf/ignite/blob/fa320c93/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdUtils.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdUtils.java index 2fed4b3..2c28178 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdUtils.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/pagemem/PageIdUtils.java @@ -197,11 +197,32 @@ public final class PageIdUtils { return pageId(fileId, pageIdx); } + /** + * @param pageId Page ID. + * @return Flag. + */ public static byte flag(long pageId) { return (byte) (( pageId >>> (PART_ID_SIZE + PAGE_IDX_SIZE) ) & FLAG_MASK); } + /** + * @param pageId Page ID. + * @return Partition. + */ public static int partId(long pageId) { return (int) ((pageId >>> PAGE_IDX_SIZE) & PART_ID_MASK); } + + /** + * @param pageId Page ID. + * @return New page ID. + */ + public static long rotatePageId(long pageId) { + assert flag(pageId) == PageIdAllocator.FLAG_IDX; // Possible only for index pages. + + int partId = partId(pageId); + long pageIdx = pageIdx(pageId); + + return pageId(partId + 1, PageIdAllocator.FLAG_IDX, pageIdx); + } } http://git-wip-us.apache.org/repos/asf/ignite/blob/fa320c93/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/BPlusTree.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/BPlusTree.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/BPlusTree.java index 10d2ff5..ea53991 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/BPlusTree.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/BPlusTree.java @@ -31,6 +31,7 @@ import org.apache.ignite.internal.IgniteInterruptedCheckedException; import org.apache.ignite.internal.pagemem.FullPageId; import org.apache.ignite.internal.pagemem.Page; import org.apache.ignite.internal.pagemem.PageIdAllocator; +import org.apache.ignite.internal.pagemem.PageIdUtils; import org.apache.ignite.internal.pagemem.PageMemory; import org.apache.ignite.internal.processors.cache.database.tree.io.BPlusIO; import org.apache.ignite.internal.processors.cache.database.tree.io.BPlusInnerIO; @@ -97,7 +98,7 @@ public abstract class BPlusTree<L, T extends L> { return null; try (Page page = page(pageId)) { - ByteBuffer buf = page.getForRead(); + ByteBuffer buf = page.getForRead(); // No correctness guaranties. try { BPlusIO io = io(buf); @@ -144,7 +145,7 @@ public abstract class BPlusTree<L, T extends L> { return "<Zero>"; try (Page page = page(pageId)) { - ByteBuffer buf = page.getForRead(); + ByteBuffer buf = page.getForRead(); // No correctness guaranties. try { BPlusIO<L> io = io(buf); @@ -162,6 +163,38 @@ public abstract class BPlusTree<L, T extends L> { }; /** */ + private final PageHandler<Get> askNeighbor = new GetPageHandler<Get>() { + @Override public int run0(Page page, ByteBuffer buf, BPlusIO<L> io, Get g, int isBack) { + assert !io.isLeaf(); // Inner page. + + if (isBack == TRUE) { + // Count can be 0 here if it is a routing page, in this case we have a single child. + int idx = io.getCount(buf); + + // We need to do get the rightmost child: io.getRight(cnt - 1), + // here io.getLeft(cnt) is the same, but handles negative index if count is 0. + long res = inner(io).getLeft(buf, idx); + + assert res != 0 : "inner page with no route down: " + page.fullId(); + + g.backId = res; + } + else { + assert isBack == FALSE: isBack; + + // Leftmost child. + long res = inner(io).getLeft(buf, 0); + + assert res != 0 : "inner page with no route down: " + page.fullId(); + + g.fwdId = res; + } + + return Get.FOUND; + } + }; + + /** */ private final PageHandler<Get> search = new GetPageHandler<Get>() { @Override public int run0(Page page, ByteBuffer buf, BPlusIO<L> io, Get g, int lvl) throws IgniteCheckedException { @@ -210,7 +243,13 @@ public abstract class BPlusTree<L, T extends L> { // This is ok from the locking standpoint because we take all locks in the forward direction. long fwdId = io.getForward(buf); - g.fwdId = fwdId == 0 ? 0 : getChild(fwdId, false); + if (fwdId == 0) + g.fwdId = 0; + else { + int res = askNeighbor(fwdId, g, false); + + assert res == Get.FOUND; // We keep lock on current page, our forward can't be removed. + } if (cnt != 0) // It is not a routing page and we are going to the right, can get backId here. g.backId = inner(io).getLeft(buf, cnt - 1); @@ -542,7 +581,7 @@ public abstract class BPlusTree<L, T extends L> { * @return Root level. */ private int getRootLevel(Page meta) { - ByteBuffer buf = meta.getForRead(); + ByteBuffer buf = meta.getForRead(); // Meta can't be removed. try { return BPlusMetaIO.VERSIONS.forPage(buf).getRootLevel(buf); @@ -558,7 +597,7 @@ public abstract class BPlusTree<L, T extends L> { * @return Page ID. */ private long getFirstPageId(Page meta, int lvl) { - ByteBuffer buf = meta.getForRead(); + ByteBuffer buf = meta.getForRead(); // Meta can't be removed. try { BPlusMetaIO io = BPlusMetaIO.VERSIONS.forPage(buf); @@ -590,7 +629,7 @@ public abstract class BPlusTree<L, T extends L> { } try (Page first = page(firstPageId)) { - ByteBuffer buf = first.getForRead(); + ByteBuffer buf = first.getForRead(); // We always merge pages backwards, the first page is never removed. try { cursor.bootstrap(buf, 0); @@ -903,8 +942,13 @@ public abstract class BPlusTree<L, T extends L> { switch (res) { case Remove.GO_DOWN_X: + assert backId != 0; + // We need to get backId here for our page, it must be the last child of our back. - r.backId = getChild(backId, true); + res = askNeighbor(backId, r, true); + + if (res != Remove.FOUND) + return res; // Retry. // Intentional fallthrough. case Remove.GO_DOWN: @@ -1016,7 +1060,7 @@ public abstract class BPlusTree<L, T extends L> { } /** - * TODO may produce wrong results on concurrent access + * !!! For debug only! May produce wrong results on concurrent access. * * @return Size. * @throws IgniteCheckedException If failed. @@ -1033,7 +1077,7 @@ public abstract class BPlusTree<L, T extends L> { while (pageId != 0) { try (Page page = page(pageId)) { - ByteBuffer buf = page.getForRead(); + ByteBuffer buf = page.getForRead(); // No correctness guaranties. try { if (io == null) { @@ -1130,6 +1174,10 @@ public abstract class BPlusTree<L, T extends L> { io.setForward(fwdBuf, io.getForward(buf)); io.setForward(buf, PageIO.getPageId(fwdBuf)); + // Copy remove ID to make sure that if inner remove touched this page, then retry + // will happen even for newly allocated forward page. + io.setRemoveId(fwdBuf, io.getRemoveId(buf)); + return res; } @@ -1271,31 +1319,12 @@ public abstract class BPlusTree<L, T extends L> { /** * @param pageId Inner page ID. - * @param last If {@code true}, then get the last, else get the first child page. - * @return Child page ID. + * @param back Get back or forward page. + * @return Operation result. */ - private long getChild(long pageId, boolean last) throws IgniteCheckedException { + private int askNeighbor(long pageId, Get g, boolean back) throws IgniteCheckedException { try (Page page = page(pageId)) { - assert page != null : "we've locked back page, forward can't be merged"; - - ByteBuffer buf = page.getForRead(); - - try { - BPlusIO<L> io = io(buf); - - // Count can be 0 here if it is a routing page, in this case we have single child. - int idx = last ? io.getCount(buf) : 0; - - // getLeft(cnt) is the same as getRight(cnt - 1) - long res = inner(io).getLeft(buf, idx); - - assert res != 0: "inner page with no route down: " + page.fullId(); - - return res; - } - finally { - page.releaseRead(); - } + return readPage(page, askNeighbor, g, back ? TRUE : FALSE, Get.RETRY); } } @@ -1450,7 +1479,7 @@ public abstract class BPlusTree<L, T extends L> { int rootLvl; long rootId; - ByteBuffer buf = meta.getForRead(); + ByteBuffer buf = meta.getForRead(); // Meta can't be removed. try { BPlusMetaIO io = BPlusMetaIO.VERSIONS.forPage(buf); @@ -2070,6 +2099,12 @@ public abstract class BPlusTree<L, T extends L> { left.io.copyItems(right.buf, left.buf, 0, leftCnt, rightCnt, !emptyBranch); left.io.setForward(left.buf, right.io.getForward(right.buf)); + long rmvId = right.io.getRemoveId(right.buf); + + // Need to have maximum remove ID. + if (rmvId > left.io.getRemoveId(left.buf)) + left.io.setRemoveId(left.buf, rmvId); + // Fix index for the right move: remove last item. int prntIdx = prnt.idx == prntCnt ? prntCnt - 1 : prnt.idx; @@ -2104,6 +2139,9 @@ public abstract class BPlusTree<L, T extends L> { // Mark removed. io.setRemoveId(buf, Long.MAX_VALUE); + // Rotate page ID to avoid concurrency issues with reused pages. + PageIO.setPageId(buf, PageIdUtils.rotatePageId(PageIO.getPageId(buf))); + if (release) writeUnlockAndClose(page); @@ -2588,7 +2626,7 @@ public abstract class BPlusTree<L, T extends L> { if (fwdId != 0) { // Lock next page. page = page(fwdId); - buf = page.getForRead(); + buf = page.getForRead(); // We keep read lock on previous page, thus forward page can't be removed. } else { // Clear. page = null; @@ -2641,6 +2679,10 @@ public abstract class BPlusTree<L, T extends L> { private abstract class GetPageHandler<G extends Get> extends PageHandler<G> { /** {@inheritDoc} */ @Override public final int run(Page page, ByteBuffer buf, G g, int lvl) throws IgniteCheckedException { + // The page was removed. + if (PageIO.getPageId(buf) != page.id()) + return Get.RETRY; + BPlusIO<L> io = io(buf); // In case of intersection with inner replace remove operation http://git-wip-us.apache.org/repos/asf/ignite/blob/fa320c93/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/util/PageHandler.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/util/PageHandler.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/util/PageHandler.java index 4e1ff0d..d16fa9a 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/util/PageHandler.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/database/tree/util/PageHandler.java @@ -62,8 +62,7 @@ public abstract class PageHandler<X> { ByteBuffer buf = page.getForRead(); - if (buf == null) - return dfltRes; + assert buf != null; try { return h.run(page, buf, arg, intArg); http://git-wip-us.apache.org/repos/asf/ignite/blob/fa320c93/modules/indexing/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeSelfTest.java ---------------------------------------------------------------------- diff --git a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeSelfTest.java b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeSelfTest.java index 3d4a62e..a8b5cf3 100644 --- a/modules/indexing/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeSelfTest.java +++ b/modules/indexing/src/test/java/org/apache/ignite/internal/processors/database/BPlusTreeSelfTest.java @@ -492,7 +492,9 @@ public class BPlusTreeSelfTest extends GridCommonAbstractTest { Map<Long,Long> map = new HashMap<>(); - for (int i = 0 ; i < 1_000_000; i++) { + int loops = reuseList == null ? 200_000 : 1000_000; + + for (int i = 0 ; i < loops; i++) { Long x = (long)tree.randomInt(CNT); if (i % 100_000 == 0)
