This is an automated email from the ASF dual-hosted git repository. mreutegg pushed a commit to branch revert-359-OAK-9564 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit d653908abd49efb992f9e14fd3593a1e4b049fee Author: Marcel Reutegger <[email protected]> AuthorDate: Mon Jan 10 11:58:36 2022 +0100 Revert "OAK-9564: Make the renewLease mechanism a bit more tolerant against unexpected updates" --- .../oak/plugins/document/ClusterNodeInfo.java | 60 +++---- .../plugins/document/ClusterNodeInfoDocument.java | 8 - .../oak/plugins/document/RecoveryLock.java | 2 - .../BaseDocumentDiscoveryLiteServiceTest.java | 11 +- .../oak/plugins/document/ClusterNodeInfoTest.java | 197 +-------------------- 5 files changed, 34 insertions(+), 244 deletions(-) diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfo.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfo.java index 9be9695..02679d2 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfo.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfo.java @@ -43,7 +43,6 @@ import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; import org.apache.jackrabbit.oak.commons.StringUtils; -import org.apache.jackrabbit.oak.commons.UUIDUtils; import org.apache.jackrabbit.oak.plugins.document.spi.lease.LeaseFailureHandler; import org.apache.jackrabbit.oak.plugins.document.util.SystemPropertySupplier; import org.apache.jackrabbit.oak.stats.Clock; @@ -177,11 +176,6 @@ public class ClusterNodeInfo { public static final String INVISIBLE = "invisible"; /** - * Runtime UUID (generated unique ID for this instance) - */ - public static final String RUNTIME_ID_KEY = "runtime_id"; - - /** * The unique machine id (the MAC address if available). */ private static final String MACHINE_ID = getHardwareMachineId(); @@ -298,6 +292,20 @@ public class ClusterNodeInfo { private volatile long leaseEndTime; /** + * The value of leaseEnd last updated towards DocumentStore - + * this one is used to compare against (for OAK-3398) when checking + * if any other instance updated the lease or if the lease is unchanged. + * (This is kind of a duplication of the leaseEndTime field, yes - but the semantics + * are that previousLeaseEndTime exactly only serves the purpose of + * keeping the value of what was stored in the previous lease update. + * leaseEndTime on the other hand serves the purpose of *defining the lease end*, + * these are two different concerns, thus justify two different fields. + * the leaseEndTime for example can be manipulated during tests therefore, + * without interfering with previousLeaseEndTime) + */ + private long previousLeaseEndTime; + + /** * The read/write mode. */ private String readWriteMode; @@ -340,22 +348,18 @@ public class ClusterNodeInfo { */ private boolean invisible; - /** - * Unique ID generated at Runtime. - */ - private final String runtimeId; private ClusterNodeInfo(int id, DocumentStore store, String machineId, String instanceId, boolean newEntry, boolean invisible) { this.id = id; this.startTime = getCurrentTime(); this.leaseEndTime = this.startTime +leaseTime; + this.previousLeaseEndTime = this.leaseEndTime; this.store = store; this.machineId = machineId; this.instanceId = instanceId; this.newEntry = newEntry; this.invisible = invisible; - this.runtimeId = UUIDUtils.generateUUID(); } void setLeaseCheckMode(@NotNull LeaseCheckMode mode) { @@ -378,10 +382,6 @@ public class ClusterNodeInfo { return instanceId; } - String getRuntimeId() { - return runtimeId; - } - boolean isInvisible() { return invisible; } @@ -483,7 +483,6 @@ public class ClusterNodeInfo { update.set(STATE, ACTIVE.name()); update.set(OAK_VERSION_KEY, OAK_VERSION); update.set(INVISIBLE, invisible); - update.set(RUNTIME_ID_KEY, clusterNode.runtimeId); ClusterNodeInfoDocument before = null; final boolean success; @@ -504,8 +503,6 @@ public class ClusterNodeInfo { update.notEquals(STATE, ACTIVE.name()); // 2) must not have a recovery lock update.notEquals(REV_RECOVERY_LOCK, ACQUIRED.name()); - // 3) must not be assigned to a different node - update.equals(RUNTIME_ID_KEY, null); success = store.findAndUpdate(Collection.CLUSTER_NODES, update) != null; } @@ -943,8 +940,7 @@ public class ClusterNodeInfo { } UpdateOp update = new UpdateOp("" + id, false); - // Update the field only if the newer value is higher (or doesn't exist) - update.max(LEASE_END_KEY, updatedLeaseEndTime); + update.set(LEASE_END_KEY, updatedLeaseEndTime); if (leaseCheckMode != LeaseCheckMode.DISABLED) { // if leaseCheckDisabled, then we just update the lease without @@ -955,13 +951,16 @@ public class ClusterNodeInfo { // then we can now make an assertion that the lease is unchanged // and the incremental update must only succeed if no-one else // did a recover/inactivation in the meantime - // make three assertions: it must still be active + // make three assertions: the leaseEnd must match .. + update.equals(LEASE_END_KEY, null, previousLeaseEndTime); + // plus it must still be active .. update.equals(STATE, null, ACTIVE.name()); // plus it must not have a recovery lock on it update.notEquals(REV_RECOVERY_LOCK, ACQUIRED.name()); - // and the runtimeId that we create at startup each time - // should be the same - update.equals(RUNTIME_ID_KEY, null, runtimeId); + // @TODO: to make it 100% failure proof we could introduce + // yet another field to clusterNodes: a runtimeId that we + // create (UUID) at startup each time - and against that + // we could also check here - but that goes a bit far IMO } if (LOG.isDebugEnabled()) { @@ -1017,6 +1016,7 @@ public class ClusterNodeInfo { return false; } leaseEndTime = updatedLeaseEndTime; + previousLeaseEndTime = leaseEndTime; // store previousLeaseEndTime for reference for next time String mode = (String) doc.get(READ_WRITE_MODE_KEY); if (mode != null && !mode.equals(readWriteMode)) { readWriteMode = mode; @@ -1038,8 +1038,8 @@ public class ClusterNodeInfo { } } // if we get here, the update failed with an exception, try to read the - // current cluster node info document and update leaseEndTime - // accordingly until leaseEndTime is reached + // current cluster node info document and update leaseEndTime & + // previousLeaseEndTime accordingly until leaseEndTime is reached while (getCurrentTime() < updatedLeaseEndTime) { synchronized (this) { if (leaseCheckFailed) { @@ -1067,9 +1067,10 @@ public class ClusterNodeInfo { "anymore. {}", id, doc); // break here and let the next lease update attempt fail break; - } else if (runtimeId.equals(doc.getRuntimeId())) { - // set lease end time to current value, as they belong - // to this same cluster node + } else if (doc.getLeaseEndTime() == previousLeaseEndTime + || doc.getLeaseEndTime() == updatedLeaseEndTime) { + // set lease end times to current values + previousLeaseEndTime = doc.getLeaseEndTime(); leaseEndTime = doc.getLeaseEndTime(); break; } else { @@ -1139,7 +1140,6 @@ public class ClusterNodeInfo { update.set(LEASE_END_KEY, null); update.set(STATE, null); update.set(INVISIBLE, false); - update.set(RUNTIME_ID_KEY, null); store.createOrUpdate(Collection.CLUSTER_NODES, update); state = NONE; } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoDocument.java index b9aa2af..506a5b2 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoDocument.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoDocument.java @@ -69,14 +69,6 @@ public class ClusterNodeInfoDocument extends Document { return startTime; } - /** - * @return the Runtime ID for this cluster node. - */ - @Nullable - public String getRuntimeId() { - return (String) get(ClusterNodeInfo.RUNTIME_ID_KEY); - } - public boolean isActive(){ return getState() == ClusterNodeState.ACTIVE; } diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/RecoveryLock.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/RecoveryLock.java index 39dcf75..4b92d4f 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/RecoveryLock.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/RecoveryLock.java @@ -25,7 +25,6 @@ import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.DEFAULT import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.LEASE_END_KEY; import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.REV_RECOVERY_BY; import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.REV_RECOVERY_LOCK; -import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.RUNTIME_ID_KEY; import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.RecoverLockState.ACQUIRED; import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.STATE; import static org.apache.jackrabbit.oak.plugins.document.Collection.CLUSTER_NODES; @@ -103,7 +102,6 @@ class RecoveryLock { if (success) { update.set(STATE, null); update.set(LEASE_END_KEY, null); - update.set(RUNTIME_ID_KEY, null); } else { // make sure lease is expired update.set(LEASE_END_KEY, clock.getTime() - 1); diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BaseDocumentDiscoveryLiteServiceTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BaseDocumentDiscoveryLiteServiceTest.java index aef1ff9..3c4a227 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BaseDocumentDiscoveryLiteServiceTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/BaseDocumentDiscoveryLiteServiceTest.java @@ -16,7 +16,6 @@ */ package org.apache.jackrabbit.oak.plugins.document; -import static org.apache.jackrabbit.oak.plugins.document.ClusterNodeInfo.LEASE_END_KEY; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -312,15 +311,7 @@ public abstract class BaseDocumentDiscoveryLiteServiceTest { private boolean setLeaseTime(final int leaseTime, final int leaseUpdateInterval) throws NoSuchFieldException { ns.getClusterInfo().setLeaseTime(leaseTime); ns.getClusterInfo().setLeaseUpdateInterval(leaseUpdateInterval); - long newLeaseTime = System.currentTimeMillis() + (leaseTime / 3) - 10 /* 10ms safety margin */; - PrivateAccessor.setField(ns.getClusterInfo(), "leaseEndTime", newLeaseTime); - - // OAK-9564: Apply the update low level to the nodeStore, as the max operation wouldn't let to apply the - // new lease time if it is lower than the current one. - UpdateOp op = new UpdateOp(String.valueOf(ns.getClusterId()), false); - op.set(LEASE_END_KEY, newLeaseTime); - ns.getDocumentStore().findAndUpdate(Collection.CLUSTER_NODES, op); - + PrivateAccessor.setField(ns.getClusterInfo(), "leaseEndTime", System.currentTimeMillis() + (leaseTime / 3) - 10 /* 10ms safety margin */); boolean renewed = ns.renewClusterIdLease(); return renewed; } diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoTest.java index 84c4edd..0f3ed5f 100644 --- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoTest.java +++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/ClusterNodeInfoTest.java @@ -18,7 +18,6 @@ package org.apache.jackrabbit.oak.plugins.document; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; @@ -39,14 +38,13 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.lessThan; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -271,110 +269,6 @@ public class ClusterNodeInfoTest { } } - // OAK-9564 - @Test - public void renewLeaseSameRuntimeId() throws Exception { - ClusterNodeInfo info = newClusterNodeInfo(1); - String runtimeId = info.getRuntimeId(); - long leaseEnd = info.getLeaseEndTime(); - waitLeaseUpdateInterval(); - assertTrue(info.renewLease()); - assertTrue(info.getLeaseEndTime() > leaseEnd); - // The Runtime UUID should remain the same - assertEquals(info.getRuntimeId(), runtimeId); - assertFalse(handler.isLeaseFailure()); - } - - // OAK-9564 - @Test - public void renewLeaseDifferentRuntimeId() throws Exception { - ClusterNodeInfo info = newClusterNodeInfo(1); - waitLeaseUpdateInterval(); - long leaseEndTimeBeforeRenew = info.getLeaseEndTime(); - - // Modify the UUID to mock it belongs to a different node - UpdateOp update = new UpdateOp("1", false); - update.set(ClusterNodeInfo.RUNTIME_ID_KEY, "different-uuid"); - store.findAndUpdate(Collection.CLUSTER_NODES, update); - - try { - info.renewLease(); - fail("Should not update lease anymore"); - } catch(DocumentStoreException e) { - // expected - } - - // Lease end time shouldn't be different - assertEquals(leaseEndTimeBeforeRenew, info.getLeaseEndTime()); - } - - // OAK-9564 - @Test - public void renewLeaseTakingLongerThanTimeout() throws Exception { - ClusterNodeInfo info = newClusterNodeInfo(1); - waitLeaseUpdateInterval(); - final long leaseEndTimeBeforeRenew = info.getLeaseEndTime(); - final String runtimeId = info.getRuntimeId(); - - Map<String, Long> unexpectedLeaseEnd = new HashMap<>(); - long unexpectedLeaseEndTime = info.getLeaseEndTime() + 133333; - unexpectedLeaseEnd.put(ClusterNodeInfo.LEASE_END_KEY, unexpectedLeaseEndTime); - - // The update will fail after 30 seconds. Simulating a Mongo timeout. - store.setFailAfterUpdate(1); - store.setDelayMillisOnce(30000); - store.setDelayMillis(10000); - store.setFindShouldAlterReturnDocument(true); - // However, the following find after the update will return an - // unexpected lease time (but still within a valid time). - // This unexpected update could come from a previous but very slow update - // executed in Mongo. So it's still a valid one, but not the new one - // that is expected. - store.setMapAlterReturnDocument(unexpectedLeaseEnd); - - // However, the current behaviour is that as the lease end time doesn't - // match the expected one, the lease will fail and the nodeStore becomes - // unusable. - try { - info.renewLease(); - } catch(DocumentStoreException e) { - // expected - } - - // The new leaseEndTime coming from Mongo is not reflected in the - // ClusterNodeInfo. Meaning it will eventually be treated as 'expired' - // by the DocumentNodeStore, even when in Mongo it was set. - assertThat(leaseEndTimeBeforeRenew, lessThan(info.getLeaseEndTime())); - assertEquals(unexpectedLeaseEndTime, info.getLeaseEndTime()); - // Runtime ID is the same - assertEquals(runtimeId, info.getRuntimeId()); - } - - // OAK-9564: This is a someway artificial test. The idea behind is to try to reproduce - // a case where a renewLease fails because of a timeout. Then the following renewLease - // occurs faster, but during that time the previous update is executed in Mongo. - // That 'older' update shouldn't go through now, reducing the effective lease end time. - @Test - public void renewLeaseShouldNotGoBackInTime() throws Exception { - ClusterNodeInfo info = newClusterNodeInfo(1); - waitLeaseUpdateInterval(); - - long newerLeaseEndTime = clock.getTime() + ClusterNodeInfo.DEFAULT_LEASE_DURATION_MILLIS + - ClusterNodeInfo.DEFAULT_LEASE_UPDATE_INTERVAL_MILLIS; - // simulate a newer renew lease took place - UpdateOp update = new UpdateOp("1", false); - update.set(ClusterNodeInfo.LEASE_END_KEY, newerLeaseEndTime); - store.findAndUpdate(Collection.CLUSTER_NODES, update); - - // now another renew happens, which will try to set a lesser lease end - info.renewLease(); - - ClusterNodeInfoDocument info2 = store.find(Collection.CLUSTER_NODES, "1"); - assertNotNull(info2); - // the lease end time should remain the same - assertEquals(newerLeaseEndTime, info2.getLeaseEndTime()); - } - @Test public void readOnlyClusterNodeInfo() { ClusterNodeInfo info = ClusterNodeInfo.getReadOnlyInstance(store); @@ -524,29 +418,6 @@ public class ClusterNodeInfoTest { info2.dispose(); } - // OAK-9564 - @Test - public void cannotGetClusterWithRuntimeId() { - ClusterNodeInfo info = newClusterNodeInfo(0); - int id = info.getId(); - assertEquals(1, id); - // shut it down - info.dispose(); - - // edit the runtime ID - UpdateOp op = new UpdateOp(String.valueOf(id), false); - op.set(ClusterNodeInfo.RUNTIME_ID_KEY, "some-different-uuid"); - assertNotNull(store.findAndUpdate(Collection.CLUSTER_NODES, op)); - - // shouldn't be able to acquire it again - try { - info = newClusterNodeInfo(id); - fail("Must fail here, and not get cluster node info"); - } catch(DocumentStoreException e) { - // expected - } - } - @Test public void acquireExpiredClusterIdStatic() throws Exception { String instanceId1 = "node1"; @@ -720,14 +591,10 @@ public class ClusterNodeInfoTest { final class TestStore extends DocumentStoreWrapper { - private final AtomicBoolean findShouldAlterReturnDocument = new AtomicBoolean(); - private final AtomicBoolean findAndUpdateShouldAlterReturnDocument = new AtomicBoolean(); - private Map mapAlterReturnDocument; private final AtomicInteger failBeforeUpdate = new AtomicInteger(); private final AtomicInteger failAfterUpdate = new AtomicInteger(); private final AtomicInteger failFind = new AtomicInteger(); private long delayMillis; - private long delayMillisOnce; TestStore() { super(new MemoryDocumentStore()); @@ -741,18 +608,10 @@ public class ClusterNodeInfoTest { public <T extends Document> T findAndUpdate(Collection<T> collection, UpdateOp update) { maybeDelay(); - maybeDelayOnce(); maybeThrow(failBeforeUpdate, "update failed before"); T doc = super.findAndUpdate(collection, update); maybeThrow(failAfterUpdate, "update failed after"); - if (getFindAndUpdateShouldAlterReturnDocument()) { - ClusterNodeInfoDocument cdoc = new ClusterNodeInfoDocument(); - cdoc.data.putAll(getMapAlterReturnDocument()); - cdoc.seal(); - return (T)cdoc; - } else { - return doc; - } + return doc; } @Override @@ -760,16 +619,7 @@ public class ClusterNodeInfoTest { String key) { maybeDelay(); maybeThrow(failFind, "find failed"); - T doc = super.find(collection, key); - if (getFindShouldAlterReturnDocument()) { - ClusterNodeInfoDocument cdoc = new ClusterNodeInfoDocument(); - doc.deepCopy(cdoc); - cdoc.data.putAll(getMapAlterReturnDocument()); - cdoc.seal(); - return (T)cdoc; - } else { - return doc; - } + return super.find(collection, key); } private void maybeDelay() { @@ -780,15 +630,6 @@ public class ClusterNodeInfoTest { } } - private void maybeDelayOnce() { - try { - clock.waitUntil(clock.getTime() + delayMillisOnce); - delayMillisOnce = 0; - } catch (InterruptedException e) { - throw new DocumentStoreException(e); - } - } - private void maybeThrow(AtomicInteger num, String msg) { if (num.get() > 0) { num.decrementAndGet(); @@ -796,30 +637,6 @@ public class ClusterNodeInfoTest { } } - public Map getMapAlterReturnDocument() { - return mapAlterReturnDocument; - } - - public void setMapAlterReturnDocument(Map mapAlterReturnDocument) { - this.mapAlterReturnDocument = mapAlterReturnDocument; - } - - public boolean getFindShouldAlterReturnDocument() { - return findShouldAlterReturnDocument.get(); - } - - public void setFindShouldAlterReturnDocument(boolean findShouldAlterReturnDocument) { - this.findShouldAlterReturnDocument.set(findShouldAlterReturnDocument); - } - - public boolean getFindAndUpdateShouldAlterReturnDocument() { - return findAndUpdateShouldAlterReturnDocument.get(); - } - - public void setFindAndUpdateShouldAlterReturnDocument(boolean findAndUpdateShouldAlterReturnDocument) { - this.findAndUpdateShouldAlterReturnDocument.set(findAndUpdateShouldAlterReturnDocument); - } - public int getFailBeforeUpdate() { return failBeforeUpdate.get(); } @@ -844,14 +661,6 @@ public class ClusterNodeInfoTest { this.delayMillis = delayMillis; } - public long getDelayMillisOnce() { - return delayMillisOnce; - } - - public void setDelayMillisOnce(long delayMillisOnce) { - this.delayMillisOnce = delayMillisOnce; - } - public int getFailFind() { return failFind.get(); }
