This is an automated email from the ASF dual-hosted git repository.
stefanegli pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new f8c61da771 OAK-10595 : test case added (#1261)
f8c61da771 is described below
commit f8c61da771b4668bdea388131b9588062a0f03ba
Author: stefan-egli <[email protected]>
AuthorDate: Wed Jan 24 12:49:16 2024 +0100
OAK-10595 : test case added (#1261)
* OAK-10595 : test case added
* OAK-10595 : relevant javadoc fix. plus test method beautify
* OAK-10595 : another, relevant, javadoc fix
* OAK-10595 : test stability improved by explicitly adding for thread 2's
success at earliest possible moment
* OAK-10595 : added missing assert that breakpoint 1 is hit
* OAK-10595 : replace invalidateCache /parent/foo via the the conflicting
update on 4 also doing a change on /parent/foo - which is simpler and nicer
* OAK-10595 : some comments added
* OAK-10595 : two more comments added for clarification
* OAK-10595 : fixed javadoc
---
.../document/DocumentNodeStoreSweepTest.java | 265 +++++++++++++++++++++
1 file changed, 265 insertions(+)
diff --git
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreSweepTest.java
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreSweepTest.java
index 8f8810784b..c9bf8bc284 100644
---
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreSweepTest.java
+++
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreSweepTest.java
@@ -19,21 +19,30 @@ package org.apache.jackrabbit.oak.plugins.document;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
+import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
import org.apache.jackrabbit.guava.common.collect.Iterables;
import org.apache.jackrabbit.guava.common.collect.Maps;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK.Builder;
+import
org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MongoFixture;
import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
+import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
+import
org.apache.jackrabbit.oak.plugins.document.prefetch.CountingMongoDatabase;
+import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
import org.apache.jackrabbit.oak.plugins.document.util.Utils;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.stats.Clock;
import org.jetbrains.annotations.NotNull;
+import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
@@ -60,6 +69,8 @@ public class DocumentNodeStoreSweepTest {
private DocumentNodeStore ns;
+ private MongoFixture mf;
+
@Before
public void before() throws Exception {
clock = new Clock.Virtual();
@@ -70,12 +81,266 @@ public class DocumentNodeStoreSweepTest {
ns = createDocumentNodeStore(0);
}
+ @After
+ public void after() throws Exception {
+ if (mf != null) {
+ mf.dispose();
+ mf = null;
+ }
+ }
+
@AfterClass
public static void resetClock() {
Revision.resetClockToDefault();
ClusterNodeInfo.resetClockToDefault();
}
+ interface UpdateCallback {
+ /**
+ * @return true to continue going via this UpdateHandler, false to
stop using
+ * this UpdateHandler
+ */
+ boolean handleUpdate(UpdateOp op);
+ }
+
+ /** limited purpose MongoFixture that allows to pause after a specific
update */
+ MongoFixture pausableMongoDocumentStore(final String targetId,
+ final UpdateCallback updateHandler) {
+ return new MongoFixture() {
+ @Override
+ public DocumentStore createDocumentStore(Builder builder) {
+ try {
+ MongoConnection connection = MongoUtils.getConnection();
+ CountingMongoDatabase db = new CountingMongoDatabase(
+ connection.getDatabase());
+ return new MongoDocumentStore(connection.getMongoClient(),
db,
+ builder) {
+ volatile boolean done;
+
+ @Override
+ public <T extends Document> T findAndUpdate(
+ Collection<T> collection, UpdateOp update) {
+ try {
+ return super.findAndUpdate(collection, update);
+ } finally {
+ updateHandler(targetId, updateHandler, update);
+ }
+ }
+
+ private void updateHandler(final String targetId,
+ final UpdateCallback updateHandler, UpdateOp
update) {
+ if (done) {
+ return;
+ }
+ if (update.getId().equals(targetId)) {
+ if (!updateHandler.handleUpdate(update)) {
+ done = true;
+ return;
+ }
+ }
+ }
+
+ @Override
+ public <T extends Document> T createOrUpdate(
+ Collection<T> collection, UpdateOp update) {
+ try {
+ return super.createOrUpdate(collection,
update);
+ } finally {
+ updateHandler(targetId, updateHandler, update);
+ }
+ }
+
+ @Override
+ public <T extends Document> List<T> createOrUpdate(
+ Collection<T> collection, List<UpdateOp>
updateOps) {
+ try {
+ return super.createOrUpdate(collection,
updateOps);
+ } finally {
+ for (UpdateOp update : updateOps) {
+ updateHandler(targetId, updateHandler,
update);
+ }
+ }
+ }
+ };
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ /**
+ * Test to show-case a race-condition between a collision and
+ * MongoDocumentStore's nodesCache: when a document is read into
+ * the nodesCache shortly before a collision is rolled back,
+ * it runs risk of later making uncommitted changes visible.
+ * <p/>
+ * The test case works as follows:
+ * <ul>
+ * <li>consider clusterId 2 and 4 being active in a cluster</li>
+ * <li>a target path /parent/foo (and sibling /parent/bar) having
+ * formerly been created</li>
+ * <li>clusterId 2: now wants to delete /parent/foo and /parent/bar, and
starts
+ * doing so versus mongo by first setting _deleted:true on those two nodes
+ * (using revision r123456789a-0-2)</li>
+ * <li>before clusterId 2 continues, clusterId 4 comes with a conflicting
update
+ * on /parent/bar (using revision r123456789b-0-4). This update notices
+ * the changes from 2 and leaves a corresponding collision marker (on 0:/
+ * with _collisions.r123456789a-0-2=r123456789b-0-4)</li>
+ * <li>before clusterId 4 proceeds, it happens to force a read from
+ * 2:/parent/foo from mongo - this is achieved as a result of
+ * another /parent/foo</li>
+ * <li>the result of the above is clusterId 4 having a state of
2:/parent/foo
+ * in its MongoDocumentStore nodesCache that contains uncommitted
information.
+ * In this test case, that uncommitted information is a deletion. But it
could
+ * be anything else really.</li>
+ * <li>then things continue on both clusterId 2 and 4 (from the previous
+ * test-pause)</li>
+ * <li>then clusterId 2 does another, unrelated change on /parent/bar,
+ * thereby resulting in a newer lastRev (on root and /parent) than the
collision.
+ * Also, it results in a sweepRev that is newer than the collision</li>
+ * <li>when later, clusterId 4 reads /parent/foo, it still finds the
+ * cached 2:/parent/foo document with the uncommitted data - plus it
+ * now has a readRevision/lastRevision that is newer than that - plus
+ * it will resolve that uncommitted data's revision (r123456789a-0-2)
+ * as commitvalue="c", since it is older than the sweepRevision</li>
+ * <li>and thus, clusterId 4 managed to read uncommitted, rolled back data
+ * from an earlier collision</li>
+ * </ul>
+ */
+ @Test
+ @Ignore(value = "OAK-10595")
+ public void cachingUncommittedBeforeCollisionRollback() throws Exception {
+ // two nodes part of the game:
+ // 2 : the main one that starts to do a subtree deletion
+ // 4 : a peer one that gets in between the above and causes a
collision.
+ // as a result 4 manages to read 2's rolled back (uncommitted) state as
+ // committed
+
+ ns.dispose();
+
+ final Semaphore breakpoint1 = new Semaphore(0);
+ final Semaphore breakpoint2 = new Semaphore(0);
+
+ final AtomicReference<Thread> breakInThread = new
AtomicReference<Thread>();
+ UpdateCallback breakOnceInThread = new UpdateCallback() {
+
+ @Override
+ public boolean handleUpdate(UpdateOp update) {
+ final Thread localThread = breakInThread.get();
+ if (localThread == null ||
!localThread.equals(Thread.currentThread())) {
+ return true;
+ }
+ breakpoint1.release();
+ try {
+ breakpoint2.tryAcquire(1, 30, TimeUnit.MINUTES);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+
+ };
+
+ mf = pausableMongoDocumentStore("2:/parent/foo",
+ breakOnceInThread);
+ DocumentStore store1 = mf.createDocumentStore(4);
+ DocumentStore store2 = mf.createDocumentStore(2);
+ ns = builderProvider.newBuilder().setDocumentStore(store1)
+ // use lenient mode because tests use a virtual clock
+
.setLeaseCheckMode(LeaseCheckMode.LENIENT).setClusterId(4).clock(clock)
+ .setAsyncDelay(0).getNodeStore();
+ DocumentNodeStore ns4 = ns;
+ DocumentNodeStore ns2 =
builderProvider.newBuilder().setDocumentStore(store2)
+ // use lenient mode because tests use a virtual clock
+
.setLeaseCheckMode(LeaseCheckMode.LENIENT).setClusterId(2).clock(clock)
+ .setAsyncDelay(0).getNodeStore();
+
+ {
+ // setup
+ NodeBuilder builder = ns4.getRoot().builder();
+ builder.child("parent").child("foo");
+ builder.child("parent").child("bar");
+ merge(ns4, builder);
+ }
+ ns4.runBackgroundOperations();
+ ns2.runBackgroundOperations();
+
+ final Semaphore successOn2 = new Semaphore(0);
+ Runnable codeOn2 = new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ // now delete but intercept the _revisions update
+ NodeBuilder builder = ns2.getRoot().builder();
+ assertTrue(builder.child("parent").child("foo").remove());
+ assertTrue(builder.child("parent").child("bar").remove());
+ breakInThread.set(Thread.currentThread());
+ merge(ns2, builder);
+ fail("supposed to fail");
+ } catch (CommitFailedException e) {
+ // supposed to fail
+ successOn2.release();
+ }
+ }
+
+ };
+ ns4.runBackgroundOperations();
+
+ // prepare a regular collision on 4
+ NodeBuilder collisionBuilder4 = ns4.getRoot().builder();
+
collisionBuilder4.child("parent").child("bar").setProperty("collideOnPurpose",
+ "indeed");
+ // do the collision also on /parent/foo
+
collisionBuilder4.child("parent").child("foo").setProperty("someotherchange",
+ "42");
+
+ // start /parent/foo deletion on 2 in a separate thread
+ Thread th2 = new Thread(codeOn2);
+ th2.setDaemon(true);
+ th2.start();
+
+ // wait for the separate thread to update /parent/foo but not commit
yet
+ assertTrue(breakpoint1.tryAcquire(1, 30, TimeUnit.SECONDS));
+
+ // then continue with the regular collision on 4
+ // this will now put the rolled-back collision change into 4's cache.
+ merge(ns4, collisionBuilder4);
+
+ // check at this point though, /parent/foo is still there:
+ assertTrue(ns4.getRoot().getChildNode("parent").hasChildNode("foo"));
+
+ // release things and go ahead
+ breakpoint2.release();
+ assertTrue(successOn2.tryAcquire(5, TimeUnit.SECONDS));
+
+ // some bg ops...
+ ns4.runBackgroundOperations();
+ ns2.runBackgroundOperations();
+ ns4.runBackgroundOperations();
+
+ // at this point /parent/foo still exists on 4
+ // (due to lastRev on 2 not yet being updated)
+ assertTrue(ns4.getRoot().getChildNode("parent").hasChildNode("foo"));
+ {
+ // but with an update on /parent/bar on 2, _lastRev updates,
+ // hence /parent/foo's rolled-back change on 4 now becomes visible
+ NodeBuilder b2 = ns2.getRoot().builder();
+ b2.child("parent").child("bar").setProperty("z", "v");
+ merge(ns2, b2);
+ ns2.runBackgroundOperations();
+ }
+ ns4.runBackgroundOperations();
+
+ // this now fails since
+ // 1) the uncommitted collison rollback deletion is still there
+ // "_deleted":{"r123456789a-0-2":"true",..}
+ // 2) plus it now resolves to commitValue "c" since it is now passed
+ // the sweep revision (since we did another commit/bg just few lines
above)
+ assertTrue("/parent/foo should exist",
ns4.getRoot().getChildNode("parent").hasChildNode("foo"));
+ }
+
@Test
public void simple() throws Exception {
NodeBuilder builder = ns.getRoot().builder();