Author: chetanm
Date: Thu May 4 06:42:22 2017
New Revision: 1793741
URL: http://svn.apache.org/viewvc?rev=1793741&view=rev
Log:
OAK-5558 - Consistency checker for Lucene indexes
Implemented support for testing based on CheckIndex utility from Lucene
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
Thu May 4 06:42:22 2017
@@ -19,7 +19,11 @@
package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
@@ -28,22 +32,37 @@ import javax.jcr.PropertyType;
import com.google.common.base.Stopwatch;
import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
+import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Root;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.writer.MultiplexersLucene;
import org.apache.jackrabbit.oak.plugins.tree.RootFactory;
import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
+import org.apache.jackrabbit.oak.spi.state.ReadOnlyBuilder;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static com.google.common.base.Preconditions.checkNotNull;
+
public class IndexConsistencyChecker {
private final Logger log = LoggerFactory.getLogger(getClass());
private final NodeState rootState;
private final String indexPath;
+ private final File workDirRoot;
public enum Level {
/**
@@ -52,7 +71,7 @@ public class IndexConsistencyChecker {
*/
BLOBS_ONLY,
/**
- * Performs full check via {@link org.apache.lucene.index.CheckIndex}.
This
+ * Performs full check via {@code org.apache.lucene.index.CheckIndex}.
This
* reads whole index and hence can take time
*/
FULL
@@ -72,17 +91,59 @@ public class IndexConsistencyChecker {
public long binaryPropSize;
- public List<String> invalidBlobIds = new ArrayList<>();
+ public List<FileSizeStatus> invalidBlobIds = new ArrayList<>();
+
+ public List<String> missingBlobIds = new ArrayList<>();
+
+ public List<DirectoryStatus> dirStatus = new ArrayList<>();
+ }
+
+ public static class DirectoryStatus {
+ public final String dirName;
+
+ public final List<String> missingFiles = new ArrayList<>();
+
+ public final List<FileSizeStatus> filesWithSizeMismatch = new
ArrayList<>();
+
+ public boolean clean;
+
+ public long size;
+
+ public CheckIndex.Status status;
+
+ public long numDocs;
+
+ public DirectoryStatus(String dirName) {
+ this.dirName = dirName;
+ }
+ }
+
+ public static class FileSizeStatus {
+ public final String name;
+
+ public final long actualSize;
+
+ public final long expectedSize;
+
+ public FileSizeStatus(String name, long actualSize, long expectedSize)
{
+ this.name = name;
+ this.actualSize = actualSize;
+ this.expectedSize = expectedSize;
+ }
- public List<String> msgs = new ArrayList<>();
+ @Override
+ public String toString() {
+ return String.format("%s => expected %d, actual %d", name,
expectedSize, actualSize);
+ }
}
- public IndexConsistencyChecker(NodeState rootState, String indexPath) {
- this.rootState = rootState;
- this.indexPath = indexPath;
+ public IndexConsistencyChecker(NodeState rootState, String indexPath, File
workDirRoot) {
+ this.rootState = checkNotNull(rootState);
+ this.indexPath = checkNotNull(indexPath);
+ this.workDirRoot = checkNotNull(workDirRoot);
}
- public Result check(Level level){
+ public Result check(Level level) throws IOException {
Stopwatch watch = Stopwatch.createStarted();
Result result = new Result();
result.indexPath = indexPath;
@@ -90,10 +151,9 @@ public class IndexConsistencyChecker {
log.debug("[{}] Starting check", indexPath);
- switch (level){
- case BLOBS_ONLY :
- checkBlobs(result);
- break;
+ checkBlobs(result);
+ if (level == Level.FULL && result.clean){
+ checkIndex(result);
}
if (result.clean){
@@ -105,6 +165,82 @@ public class IndexConsistencyChecker {
return result;
}
+ private void checkIndex(Result result) throws IOException {
+ NodeState idx = NodeStateUtils.getNode(rootState, indexPath);
+ IndexDefinition defn = IndexDefinition.newBuilder(rootState, idx,
indexPath).build();
+ File workDir = createWorkDir(workDirRoot,
PathUtils.getName(indexPath));
+
+ for (String dirName : idx.getChildNodeNames()){
+ //TODO Check for SuggestionDirectory Pending
+ if (NodeStateUtils.isHidden(dirName) &&
MultiplexersLucene.isIndexDirName(dirName)){
+ DirectoryStatus dirStatus = new DirectoryStatus(dirName);
+ result.dirStatus.add(dirStatus);
+ log.warn("[{}] Checking directory {}", indexPath, dirName);
+ try {
+ checkIndexDirectory(dirStatus, idx, defn, workDir,
dirName);
+ } catch (IOException e){
+ dirStatus.clean = false;
+ log.warn("[{}][{}] Error occurred while performing
directory check", indexPath, dirName, e);
+ }
+
+ if (!dirStatus.clean){
+ result.clean = false;
+ }
+ }
+ }
+ }
+
+ private void checkIndexDirectory(DirectoryStatus dirStatus, NodeState idx,
IndexDefinition defn,
+ File workDir, String dirName) throws
IOException {
+ File idxDir = createWorkDir(workDir, dirName);
+ Directory sourceDir = new OakDirectory(new ReadOnlyBuilder(idx),
dirName, defn, true);
+ Directory targetDir = FSDirectory.open(idxDir);
+
+ boolean clean = true;
+ for (String file : sourceDir.listAll()) {
+ log.debug("[{}][{}] Checking {}", indexPath, dirName, file);
+ try {
+ sourceDir.copy(targetDir, file, file, IOContext.DEFAULT);
+ } catch (FileNotFoundException ignore){
+ dirStatus.missingFiles.add(file);
+ clean = false;
+ log.warn("[{}][{}] File {} missing", indexPath, dirName, file);
+ }
+
+ if (targetDir.fileLength(file) != sourceDir.fileLength(file)){
+ FileSizeStatus fileStatus = new FileSizeStatus(file,
targetDir.fileLength(file), sourceDir.fileLength(file));
+ dirStatus.filesWithSizeMismatch.add(fileStatus);
+ clean = false;
+ log.warn("[{}][{}] File size mismatch {}", indexPath, dirName,
fileStatus);
+ } else {
+ dirStatus.size += sourceDir.fileLength(file);
+ log.debug("[{}][{}] File {} is consistent", indexPath,
dirName, file);
+ }
+ }
+
+ if (clean){
+ log.debug("[{}][{}] Directory content found to be consistent.
Proceeding to IndexCheck", indexPath, dirName);
+ CheckIndex ci = new CheckIndex(targetDir);
+
+ if (log.isDebugEnabled()) {
+ ci.setInfoStream(new LoggingPrintStream(log),
log.isTraceEnabled());
+ }
+
+ dirStatus.status = ci.checkIndex();
+ dirStatus.clean = dirStatus.status.clean;
+ log.debug("[{}][{}] IndexCheck was successful. Proceeding to open
DirectoryReader", indexPath, dirName);
+ }
+
+ if (dirStatus.clean){
+ DirectoryReader dirReader = DirectoryReader.open(targetDir);
+ dirStatus.numDocs = dirReader.numDocs();
+ log.debug("[{}][{}] DirectoryReader can be opened", indexPath,
dirName);
+ dirReader.close();
+ }
+ }
+
+ //~---------------------------------------< Blob Validation >
+
private void checkBlobs(Result result) {
Root root = RootFactory.createReadOnlyRoot(rootState);
Tree idx = root.getTree(indexPath);
@@ -148,8 +284,7 @@ public class IndexConsistencyChecker {
if (cis.getCount() != blob.length()){
String msg = String.format("Invalid blob %s. Length mismatch -
expected ${%d} -> found ${%d}",
blobPath, blob.length(), cis.getCount());
- result.msgs.add(msg);
- result.invalidBlobIds.add(id);
+ result.invalidBlobIds.add(new FileSizeStatus(blobPath,
cis.getCount(), blob.length()));
log.warn("[{}] {}", indexPath, msg);
result.clean = false;
result.blobSizeMismatch = true;
@@ -157,10 +292,45 @@ public class IndexConsistencyChecker {
result.binaryPropSize += cis.getCount();
} catch (Exception e) {
log.warn("[{}] Error occurred reading blob at {}", indexPath,
blobPath, e);
- result.invalidBlobIds.add(id);
+ result.missingBlobIds.add(id);
result.clean = false;
result.missingBlobs = true;
}
}
+ //~-----------------------------------------------< utility >
+
+ private static File createWorkDir(File parent, String name) throws
IOException {
+ String fsSafeName = IndexRootDirectory.getFSSafeName(name);
+ File dir = new File(parent, fsSafeName);
+ FileUtils.forceMkdir(dir);
+ FileUtils.cleanDirectory(dir);
+ return dir;
+ }
+
+ /**
+ * Adapter to pipe info messages from lucene into log messages.
+ */
+ private static final class LoggingPrintStream extends PrintStream {
+
+ /** Buffer print calls until a newline is written */
+ private final StringBuffer buffer = new StringBuffer();
+
+ private final Logger log;
+
+ public LoggingPrintStream(Logger log) {
+ super(ByteStreams.nullOutputStream());
+ this.log = log;
+ }
+
+ public void print(String s) {
+ buffer.append(s);
+ }
+
+ public void println(String s) {
+ buffer.append(s);
+ log.debug(buffer.toString());
+ buffer.setLength(0);
+ }
+ }
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
Thu May 4 06:42:22 2017
@@ -29,9 +29,9 @@ public final class MultiplexersLucene {
/**
* Prefix used to decorate mount names to represent index directory
*/
- private static final String INDEX_DIR_SUFFIX = "-index-data";
+ public static final String INDEX_DIR_SUFFIX = "-index-data";
- private static final String SUGGEST_DIR_SUFFIX = "-suggest-data";
+ public static final String SUGGEST_DIR_SUFFIX = "-suggest-data";
public static String getIndexDirName(Mount mount) {
if (mount.isDefault()){
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
Thu May 4 06:42:22 2017
@@ -19,27 +19,57 @@
package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
import java.io.InputStream;
+import java.util.List;
import javax.annotation.Nonnull;
import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.oak.InitialContent;
+import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakAnalyzer;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory;
import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Level;
import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Result;
import
org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.writer.MultiplexersLucene;
import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import static
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static
org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
public class IndexConsistencyCheckerTest {
+ private NodeState rootState = InitialContent.INITIAL_CONTENT;
+ private NodeBuilder idx = new IndexDefinitionBuilder().build().builder();
+
+ @Rule
+ public final TemporaryFolder temporaryFolder = new TemporaryFolder(new
File("target"));
+
@Test
public void emptyIndex() throws Exception{
- IndexConsistencyChecker checker = new
IndexConsistencyChecker(EMPTY_NODE, "/foo");
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(EMPTY_NODE, "/foo", temporaryFolder.getRoot());
Result result = checker.check(Level.BLOBS_ONLY);
assertFalse(result.clean);
assertTrue(result.typeMismatch);
@@ -49,9 +79,7 @@ public class IndexConsistencyCheckerTest
@Test
public void blobsWithError() throws Exception{
FailingBlob failingBlob = new FailingBlob("foo");
- IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
- NodeBuilder idx = defnBuilder.build().builder();
idx.setProperty("foo", failingBlob);
idx.child(":index").setProperty("foo", failingBlob);
idx.child("b").setProperty("foo", Lists.newArrayList(failingBlob,
failingBlob), Type.BINARIES);
@@ -59,27 +87,25 @@ public class IndexConsistencyCheckerTest
NodeBuilder builder = EMPTY_NODE.builder();
builder.setChildNode("a", idx.getNodeState());
- IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a");
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a",
temporaryFolder.getRoot());
Result result = checker.check(Level.BLOBS_ONLY);
assertFalse(result.clean);
assertTrue(result.missingBlobs);
assertFalse(result.blobSizeMismatch);
- assertEquals(4, result.invalidBlobIds.size());
+ assertEquals(4, result.missingBlobIds.size());
}
@Test
public void blobsWithSizeMismatch() throws Exception{
FailingBlob failingBlob = new FailingBlob("foo", true);
- IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
- NodeBuilder idx = defnBuilder.build().builder();
idx.child(":index").setProperty("foo", failingBlob);
NodeBuilder builder = EMPTY_NODE.builder();
builder.setChildNode("a", idx.getNodeState());
- IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a");
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a",
temporaryFolder.getRoot());
Result result = checker.check(Level.BLOBS_ONLY);
assertFalse(result.clean);
@@ -88,6 +114,92 @@ public class IndexConsistencyCheckerTest
assertEquals(1, result.invalidBlobIds.size());
}
+ @Test
+ public void validIndexTest() throws Exception{
+ IndexDefinition defn = IndexDefinition.newBuilder(rootState,
idx.getNodeState(), "/fooIndex").build();
+ Directory dir = new OakDirectory(idx, ":data", defn, false);
+ createIndex(dir, 10);
+
+ dir = new OakDirectory(idx, ":data2"+
MultiplexersLucene.INDEX_DIR_SUFFIX, defn, false);
+ createIndex(dir, 10);
+
+ NodeBuilder builder = rootState.builder();
+ builder.setChildNode("fooIndex", idx.getNodeState());
+ NodeState indexState = builder.getNodeState();
+
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+ Result result = checker.check(Level.BLOBS_ONLY);
+ assertTrue(result.clean);
+
+ checker = new IndexConsistencyChecker(indexState, "/fooIndex",
temporaryFolder.getRoot());
+ result = checker.check(Level.FULL);
+ assertTrue(result.clean);
+ assertEquals(2, result.dirStatus.size());
+ }
+
+ @Test
+ public void missingFile() throws Exception{
+ IndexDefinition defn = IndexDefinition.newBuilder(rootState,
idx.getNodeState(), "/fooIndex").build();
+ Directory dir = new OakDirectory(idx, ":data", defn, false);
+ createIndex(dir, 10);
+
+ NodeBuilder builder = rootState.builder();
+
+ idx.getChildNode(":data").getChildNode("segments.gen").remove();
+
+ builder.setChildNode("fooIndex", idx.getNodeState());
+ NodeState indexState = builder.getNodeState();
+
+ System.out.println(NodeStateUtils.toString(idx.getNodeState()));
+
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+ Result result = checker.check(Level.FULL);
+ assertFalse(result.clean);
+ assertEquals(1, result.dirStatus.get(0).missingFiles.size());
+ assertNull(result.dirStatus.get(0).status);
+ }
+
+ @Test
+ public void badFile() throws Exception{
+ IndexDefinition defn = IndexDefinition.newBuilder(rootState,
idx.getNodeState(), "/fooIndex").build();
+ Directory dir = new OakDirectory(idx, ":data", defn, false);
+ createIndex(dir, 10);
+
+ NodeBuilder builder = rootState.builder();
+
+ NodeBuilder file = idx.getChildNode(":data").getChildNode("_0.cfe");
+ List<Blob> blobs =
Lists.newArrayList(file.getProperty("jcr:data").getValue(Type.BINARIES));
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ IOUtils.copy(blobs.get(0).getNewStream(), baos);
+ byte[] bytes = baos.toByteArray();
+ bytes[0] = (byte)(bytes[0] ^ (1 << 3)); //Flip the 3rd bit to make it
corrupt
+ blobs.set(0, new ArrayBasedBlob(bytes));
+ file.setProperty("jcr:data", blobs, Type.BINARIES);
+
+ builder.setChildNode("fooIndex", idx.getNodeState());
+ NodeState indexState = builder.getNodeState();
+
+ System.out.println(NodeStateUtils.toString(idx.getNodeState()));
+
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+ Result result = checker.check(Level.FULL);
+ assertFalse(result.clean);
+ assertEquals(0, result.dirStatus.get(0).missingFiles.size());
+ assertFalse(result.dirStatus.get(0).status.clean);
+ }
+
+ private void createIndex(Directory dir, int numOfDocs) throws IOException {
+ IndexWriter w = new IndexWriter(dir, new
IndexWriterConfig(Version.LUCENE_47, new OakAnalyzer(Version.LUCENE_47)));
+ for (int i = 0; i < numOfDocs; i++) {
+ Document d1 = new Document();
+ d1.add(newPathField("/a/b"));
+ w.addDocument(d1);
+ }
+ w.close();
+ dir.close();
+ }
+
private static class FailingBlob extends ArrayBasedBlob {
static int count;
private final String id;