| Hi, I'm working on the TarMK failover package and trying to make it more robust and reliable. I noticed that there are still some situations in which the current implementation will run into persistent problems. It's not obvious what the "right" strategy is so let me paint the picture quite quickly: If you have a slave and corrupt the local tar files (eg. kill the process during a sync/write operation) it can happen that the next sync with the master tries to update the informations and 1) nothing happens because the head segment is unchanged and locally present 2) a new head segment is received and only the new head segment will be stored 3) during a "compareAgainstBaseState"-call an exception is thrown because a segment can't be read locally (see attached stacktrace). As soon as you enter this state you're stuck. The problem is that the FileStore has its own immutable Tracker which is used for reading segments. The SegmentTracker of the FailoverStore will not be used if segments are read from the existing store. Due to the fact that a quite "generic" IllegalStateException is thrown the handling is difficult. A minimal inversive approach could be: * add a FileStoreCorruptException (extending java.lang.IllegalStateException) and pass the SegmentId to the exception * catch this exception in the SegmentLoaderHandler and refetch the segment from the master and persist it This recovers from the error. Any comments? Regards Martin |
java.lang.IllegalStateException: Segment 4ffeebf5-f745-446f-adac-3fb6f438e23f
not found
at
org.apache.jackrabbit.oak.plugins.segment.file.FileStore.readSegment(FileStore.java:663)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentTracker.getSegment(SegmentTracker.java:119)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentId.getSegment(SegmentId.java:104)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.Segment.readString(Segment.java:313)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.Segment.loadTemplate(Segment.java:386)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.Segment.readTemplate(Segment.java:367)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.Segment.readTemplate(Segment.java:361)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.getTemplate(SegmentNodeState.java:78)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:405)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:492)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:492)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord$2.childNodeChanged(MapRecord.java:403)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:460)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:394)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:544)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord$2.childNodeChanged(MapRecord.java:403)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord$3.childNodeChanged(MapRecord.java:444)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:487)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:436)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:394)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:544)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord$2.childNodeChanged(MapRecord.java:403)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:487)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:394)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:544)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.spi.state.ApplyDiff.childNodeChanged(ApplyDiff.java:87)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.MapRecord.compare(MapRecord.java:487)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState.compareAgainstBaseState(SegmentNodeState.java:544)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.failover.client.SegmentLoaderHandler.initSync(SegmentLoaderHandler.java:98)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.failover.client.SegmentLoaderHandler.channelActive(SegmentLoaderHandler.java:76)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.failover.client.FailoverClientHandler.setHead(FailoverClientHandler.java:103)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.failover.client.FailoverClientHandler.channelRead0(FailoverClientHandler.java:71)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
org.apache.jackrabbit.oak.plugins.segment.failover.client.FailoverClientHandler.channelRead0(FailoverClientHandler.java:39)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
~[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.channel.AbstractChannelHandlerContext.access$700(AbstractChannelHandlerContext.java:32)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.channel.AbstractChannelHandlerContext$8.run(AbstractChannelHandlerContext.java:324)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.util.concurrent.DefaultEventExecutor.run(DefaultEventExecutor.java:36)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at
io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
[oak-run-1.1-SNAPSHOT.jar:1.1-SNAPSHOT]
at java.lang.Thread.run(Thread.java:680) [na:1.6.0_37]diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
index 952ea92..82f2658 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
@@ -660,7 +660,7 @@ public class FileStore implements SegmentStore {
}
}
- throw new IllegalStateException("Segment " + id + " not found");
+ throw new FileStoreCorruptException("Segment " + id + " not found",
id);
}
@Override
@@ -722,4 +722,13 @@ public class FileStore implements SegmentStore {
this.pauseCompaction = pauseCompaction;
return this;
}
+
+ public class FileStoreCorruptException extends IllegalStateException {
+ public final SegmentId id;
+
+ public FileStoreCorruptException(String message, SegmentId id) {
+ super(message);
+ this.id = id;
+ }
+ }
}
diff --git
a/oak-tarmk-failover/src/main/java/org/apache/jackrabbit/oak/plugins/segment/failover/client/SegmentLoaderHandler.java
b/oak-tarmk-failover/src/main/java/org/apache/jackrabbit/oak/plugins/segment/failover/client/SegmentLoaderHandler.java
index b77634e..0f130f7 100644
---
a/oak-tarmk-failover/src/main/java/org/apache/jackrabbit/oak/plugins/segment/failover/client/SegmentLoaderHandler.java
+++
b/oak-tarmk-failover/src/main/java/org/apache/jackrabbit/oak/plugins/segment/failover/client/SegmentLoaderHandler.java
@@ -25,6 +25,8 @@ import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import io.netty.util.concurrent.EventExecutorGroup;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
@@ -37,6 +39,7 @@ import
org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState;
import org.apache.jackrabbit.oak.plugins.segment.failover.codec.SegmentReply;
import org.apache.jackrabbit.oak.plugins.segment.failover.store.FailoverStore;
import
org.apache.jackrabbit.oak.plugins.segment.failover.store.RemoteSegmentLoader;
+import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
import org.apache.jackrabbit.oak.spi.state.ApplyDiff;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -95,15 +98,39 @@ public class SegmentLoaderHandler extends
ChannelInboundHandlerAdapter
SegmentNodeBuilder builder = before.builder();
SegmentNodeState current = new SegmentNodeState(head);
- current.compareAgainstBaseState(before, new ApplyDiff(builder));
-
+ do {
+ try {
+ current.compareAgainstBaseState(before, new
ApplyDiff(builder));
+ break;
+ }
+ catch (FileStore.FileStoreCorruptException e) {
+ // the segment is locally damaged or not present anymore
+ // lets try to read this from the master again
+ Segment s = readSegment(e.id);
+ if (s == null) {
+ log.warn("can't read locally corrupt segment " + e.id
+ " from master");
+ throw e;
+ }
+
+ log.info("did reread locally corrupt segment " + e.id + "
with size " + s.size());
+ ByteArrayOutputStream bout = new
ByteArrayOutputStream(s.size());
+ try {
+ s.writeTo(bout);
+ }
+ catch (IOException f) {
+ log.error("can't wrap segment to output stream", f);
+ throw e;
+ }
+ store.writeSegment(e.id, bout.toByteArray(), 0, s.size());
+ }
+ } while(true);
boolean ok = store.setHead(before, builder.getNodeState());
log.info("#updated state (set head {}) in {}ms.", ok,
System.currentTimeMillis() - t);
} finally {
close();
}
- log.info("returning initSync");
+ log.debug("returning initSync");
}
@Override
signature.asc
Description: Message signed with OpenPGP using GPGMail
