Author: chetanm
Date: Thu May  4 06:42:22 2017
New Revision: 1793741

URL: http://svn.apache.org/viewvc?rev=1793741&view=rev
Log:
OAK-5558 - Consistency checker for Lucene indexes

Implemented support for testing based on CheckIndex utility from Lucene

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
 Thu May  4 06:42:22 2017
@@ -19,7 +19,11 @@
 
 package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
 
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -28,22 +32,37 @@ import javax.jcr.PropertyType;
 import com.google.common.base.Stopwatch;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.CountingInputStream;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Root;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.writer.MultiplexersLucene;
 import org.apache.jackrabbit.oak.plugins.tree.RootFactory;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
+import org.apache.jackrabbit.oak.spi.state.ReadOnlyBuilder;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 public class IndexConsistencyChecker {
     private final Logger log = LoggerFactory.getLogger(getClass());
     private final NodeState rootState;
     private final String indexPath;
+    private final File workDirRoot;
 
     public enum Level {
         /**
@@ -52,7 +71,7 @@ public class IndexConsistencyChecker {
          */
         BLOBS_ONLY,
         /**
-         * Performs full check via {@link org.apache.lucene.index.CheckIndex}. 
This
+         * Performs full check via {@code org.apache.lucene.index.CheckIndex}. 
This
          * reads whole index and hence can take time
          */
         FULL
@@ -72,17 +91,59 @@ public class IndexConsistencyChecker {
 
         public long binaryPropSize;
 
-        public List<String> invalidBlobIds = new ArrayList<>();
+        public List<FileSizeStatus> invalidBlobIds = new ArrayList<>();
+
+        public List<String> missingBlobIds = new ArrayList<>();
+
+        public List<DirectoryStatus> dirStatus = new ArrayList<>();
+    }
+
+    public static class DirectoryStatus {
+        public final String dirName;
+
+        public final List<String> missingFiles = new ArrayList<>();
+
+        public final List<FileSizeStatus> filesWithSizeMismatch = new 
ArrayList<>();
+
+        public boolean clean;
+
+        public long size;
+
+        public CheckIndex.Status status;
+
+        public long numDocs;
+
+        public DirectoryStatus(String dirName) {
+            this.dirName = dirName;
+        }
+    }
+
+    public static class FileSizeStatus {
+        public final String name;
+
+        public final long actualSize;
+
+        public final long expectedSize;
+
+        public FileSizeStatus(String name, long actualSize, long expectedSize) 
{
+            this.name = name;
+            this.actualSize = actualSize;
+            this.expectedSize = expectedSize;
+        }
 
-        public List<String> msgs = new ArrayList<>();
+        @Override
+        public String toString() {
+            return String.format("%s => expected %d, actual %d", name, 
expectedSize, actualSize);
+        }
     }
 
-    public IndexConsistencyChecker(NodeState rootState, String indexPath) {
-        this.rootState = rootState;
-        this.indexPath = indexPath;
+    public IndexConsistencyChecker(NodeState rootState, String indexPath, File 
workDirRoot) {
+        this.rootState = checkNotNull(rootState);
+        this.indexPath = checkNotNull(indexPath);
+        this.workDirRoot = checkNotNull(workDirRoot);
     }
 
-    public Result check(Level level){
+    public Result check(Level level) throws IOException {
         Stopwatch watch = Stopwatch.createStarted();
         Result result = new Result();
         result.indexPath = indexPath;
@@ -90,10 +151,9 @@ public class IndexConsistencyChecker {
 
         log.debug("[{}] Starting check", indexPath);
 
-        switch (level){
-            case BLOBS_ONLY :
-                checkBlobs(result);
-                break;
+        checkBlobs(result);
+        if (level == Level.FULL && result.clean){
+            checkIndex(result);
         }
 
         if (result.clean){
@@ -105,6 +165,82 @@ public class IndexConsistencyChecker {
         return result;
     }
 
+    private void checkIndex(Result result) throws IOException {
+        NodeState idx = NodeStateUtils.getNode(rootState, indexPath);
+        IndexDefinition defn = IndexDefinition.newBuilder(rootState, idx, 
indexPath).build();
+        File workDir = createWorkDir(workDirRoot, 
PathUtils.getName(indexPath));
+
+        for (String dirName : idx.getChildNodeNames()){
+            //TODO Check for SuggestionDirectory Pending
+            if (NodeStateUtils.isHidden(dirName) && 
MultiplexersLucene.isIndexDirName(dirName)){
+                DirectoryStatus dirStatus = new DirectoryStatus(dirName);
+                result.dirStatus.add(dirStatus);
+                log.warn("[{}] Checking directory {}", indexPath, dirName);
+                try {
+                    checkIndexDirectory(dirStatus, idx, defn, workDir, 
dirName);
+                } catch (IOException e){
+                    dirStatus.clean = false;
+                    log.warn("[{}][{}] Error occurred while performing 
directory check", indexPath, dirName, e);
+                }
+
+                if (!dirStatus.clean){
+                    result.clean = false;
+                }
+            }
+        }
+    }
+
+    private void checkIndexDirectory(DirectoryStatus dirStatus, NodeState idx, 
IndexDefinition defn,
+                                     File workDir, String dirName) throws 
IOException {
+        File idxDir = createWorkDir(workDir, dirName);
+        Directory sourceDir = new OakDirectory(new ReadOnlyBuilder(idx), 
dirName, defn, true);
+        Directory targetDir = FSDirectory.open(idxDir);
+
+        boolean clean = true;
+        for (String file : sourceDir.listAll()) {
+            log.debug("[{}][{}] Checking {}", indexPath, dirName, file);
+            try {
+                sourceDir.copy(targetDir, file, file, IOContext.DEFAULT);
+            } catch (FileNotFoundException ignore){
+                dirStatus.missingFiles.add(file);
+                clean = false;
+                log.warn("[{}][{}] File {} missing", indexPath, dirName, file);
+            }
+
+            if (targetDir.fileLength(file) != sourceDir.fileLength(file)){
+                FileSizeStatus fileStatus = new FileSizeStatus(file, 
targetDir.fileLength(file), sourceDir.fileLength(file));
+                dirStatus.filesWithSizeMismatch.add(fileStatus);
+                clean = false;
+                log.warn("[{}][{}] File size mismatch {}", indexPath, dirName, 
fileStatus);
+            } else {
+                dirStatus.size += sourceDir.fileLength(file);
+                log.debug("[{}][{}] File {} is consistent", indexPath, 
dirName, file);
+            }
+        }
+
+        if (clean){
+            log.debug("[{}][{}] Directory content found to be consistent. 
Proceeding to IndexCheck", indexPath, dirName);
+            CheckIndex ci = new CheckIndex(targetDir);
+
+            if (log.isDebugEnabled()) {
+                ci.setInfoStream(new LoggingPrintStream(log), 
log.isTraceEnabled());
+            }
+
+            dirStatus.status = ci.checkIndex();
+            dirStatus.clean = dirStatus.status.clean;
+            log.debug("[{}][{}] IndexCheck was successful. Proceeding to open 
DirectoryReader", indexPath, dirName);
+        }
+
+        if (dirStatus.clean){
+            DirectoryReader dirReader = DirectoryReader.open(targetDir);
+            dirStatus.numDocs = dirReader.numDocs();
+            log.debug("[{}][{}] DirectoryReader can be opened", indexPath, 
dirName);
+            dirReader.close();
+        }
+    }
+
+    //~---------------------------------------< Blob Validation >
+
     private void checkBlobs(Result result) {
         Root root = RootFactory.createReadOnlyRoot(rootState);
         Tree idx = root.getTree(indexPath);
@@ -148,8 +284,7 @@ public class IndexConsistencyChecker {
             if (cis.getCount() != blob.length()){
                 String msg = String.format("Invalid blob %s. Length mismatch - 
expected ${%d} -> found ${%d}",
                         blobPath, blob.length(), cis.getCount());
-                result.msgs.add(msg);
-                result.invalidBlobIds.add(id);
+                result.invalidBlobIds.add(new FileSizeStatus(blobPath, 
cis.getCount(), blob.length()));
                 log.warn("[{}] {}", indexPath, msg);
                 result.clean = false;
                 result.blobSizeMismatch = true;
@@ -157,10 +292,45 @@ public class IndexConsistencyChecker {
             result.binaryPropSize += cis.getCount();
         } catch (Exception e) {
             log.warn("[{}] Error occurred reading blob at {}", indexPath, 
blobPath, e);
-            result.invalidBlobIds.add(id);
+            result.missingBlobIds.add(id);
             result.clean = false;
             result.missingBlobs = true;
         }
     }
 
+    //~-----------------------------------------------< utility >
+
+    private static File createWorkDir(File parent, String name) throws 
IOException {
+        String fsSafeName = IndexRootDirectory.getFSSafeName(name);
+        File dir = new File(parent, fsSafeName);
+        FileUtils.forceMkdir(dir);
+        FileUtils.cleanDirectory(dir);
+        return dir;
+    }
+
+    /**
+     * Adapter to pipe info messages from lucene into log messages.
+     */
+    private static final class LoggingPrintStream extends PrintStream {
+
+        /** Buffer print calls until a newline is written */
+        private final StringBuffer buffer = new StringBuffer();
+
+        private final Logger log;
+
+        public LoggingPrintStream(Logger log) {
+            super(ByteStreams.nullOutputStream());
+            this.log = log;
+        }
+
+        public void print(String s) {
+            buffer.append(s);
+        }
+
+        public void println(String s) {
+            buffer.append(s);
+            log.debug(buffer.toString());
+            buffer.setLength(0);
+        }
+    }
 }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexersLucene.java
 Thu May  4 06:42:22 2017
@@ -29,9 +29,9 @@ public final class MultiplexersLucene {
     /**
      * Prefix used to decorate mount names to represent index directory
      */
-    private static final String INDEX_DIR_SUFFIX = "-index-data";
+    public static final String INDEX_DIR_SUFFIX = "-index-data";
 
-    private static final String SUGGEST_DIR_SUFFIX = "-suggest-data";
+    public static final String SUGGEST_DIR_SUFFIX = "-suggest-data";
 
     public static String getIndexDirName(Mount mount) {
         if (mount.isDefault()){

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java?rev=1793741&r1=1793740&r2=1793741&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
 Thu May  4 06:42:22 2017
@@ -19,27 +19,57 @@
 
 package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
 
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
 import java.io.InputStream;
+import java.util.List;
 
 import javax.annotation.Nonnull;
 
 import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.oak.InitialContent;
+import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakAnalyzer;
+import org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Level;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Result;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.writer.MultiplexersLucene;
 import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 
+import static 
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
 import static 
org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 public class IndexConsistencyCheckerTest {
 
+    private NodeState rootState = InitialContent.INITIAL_CONTENT;
+    private NodeBuilder idx = new IndexDefinitionBuilder().build().builder();
+
+    @Rule
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder(new 
File("target"));
+
     @Test
     public void emptyIndex() throws Exception{
-        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(EMPTY_NODE, "/foo");
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(EMPTY_NODE, "/foo", temporaryFolder.getRoot());
         Result result = checker.check(Level.BLOBS_ONLY);
         assertFalse(result.clean);
         assertTrue(result.typeMismatch);
@@ -49,9 +79,7 @@ public class IndexConsistencyCheckerTest
     @Test
     public void blobsWithError() throws Exception{
         FailingBlob failingBlob = new FailingBlob("foo");
-        IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
 
-        NodeBuilder idx = defnBuilder.build().builder();
         idx.setProperty("foo", failingBlob);
         idx.child(":index").setProperty("foo", failingBlob);
         idx.child("b").setProperty("foo", Lists.newArrayList(failingBlob, 
failingBlob), Type.BINARIES);
@@ -59,27 +87,25 @@ public class IndexConsistencyCheckerTest
         NodeBuilder builder = EMPTY_NODE.builder();
         builder.setChildNode("a", idx.getNodeState());
 
-        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a");
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a", 
temporaryFolder.getRoot());
         Result result = checker.check(Level.BLOBS_ONLY);
 
         assertFalse(result.clean);
         assertTrue(result.missingBlobs);
         assertFalse(result.blobSizeMismatch);
-        assertEquals(4, result.invalidBlobIds.size());
+        assertEquals(4, result.missingBlobIds.size());
     }
 
     @Test
     public void blobsWithSizeMismatch() throws Exception{
         FailingBlob failingBlob = new FailingBlob("foo", true);
-        IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
 
-        NodeBuilder idx = defnBuilder.build().builder();
         idx.child(":index").setProperty("foo", failingBlob);
 
         NodeBuilder builder = EMPTY_NODE.builder();
         builder.setChildNode("a", idx.getNodeState());
 
-        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a");
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a", 
temporaryFolder.getRoot());
         Result result = checker.check(Level.BLOBS_ONLY);
 
         assertFalse(result.clean);
@@ -88,6 +114,92 @@ public class IndexConsistencyCheckerTest
         assertEquals(1, result.invalidBlobIds.size());
     }
 
+    @Test
+    public void validIndexTest() throws Exception{
+        IndexDefinition defn = IndexDefinition.newBuilder(rootState, 
idx.getNodeState(), "/fooIndex").build();
+        Directory dir = new OakDirectory(idx, ":data", defn, false);
+        createIndex(dir, 10);
+
+        dir = new OakDirectory(idx, ":data2"+ 
MultiplexersLucene.INDEX_DIR_SUFFIX, defn, false);
+        createIndex(dir, 10);
+
+        NodeBuilder builder = rootState.builder();
+        builder.setChildNode("fooIndex", idx.getNodeState());
+        NodeState indexState = builder.getNodeState();
+
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+        Result result = checker.check(Level.BLOBS_ONLY);
+        assertTrue(result.clean);
+
+        checker = new IndexConsistencyChecker(indexState, "/fooIndex", 
temporaryFolder.getRoot());
+        result = checker.check(Level.FULL);
+        assertTrue(result.clean);
+        assertEquals(2, result.dirStatus.size());
+    }
+
+    @Test
+    public void missingFile() throws Exception{
+        IndexDefinition defn = IndexDefinition.newBuilder(rootState, 
idx.getNodeState(), "/fooIndex").build();
+        Directory dir = new OakDirectory(idx, ":data", defn, false);
+        createIndex(dir, 10);
+
+        NodeBuilder builder = rootState.builder();
+
+        idx.getChildNode(":data").getChildNode("segments.gen").remove();
+
+        builder.setChildNode("fooIndex", idx.getNodeState());
+        NodeState indexState = builder.getNodeState();
+
+        System.out.println(NodeStateUtils.toString(idx.getNodeState()));
+
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+        Result result = checker.check(Level.FULL);
+        assertFalse(result.clean);
+        assertEquals(1, result.dirStatus.get(0).missingFiles.size());
+        assertNull(result.dirStatus.get(0).status);
+    }
+
+    @Test
+    public void badFile() throws Exception{
+        IndexDefinition defn = IndexDefinition.newBuilder(rootState, 
idx.getNodeState(), "/fooIndex").build();
+        Directory dir = new OakDirectory(idx, ":data", defn, false);
+        createIndex(dir, 10);
+
+        NodeBuilder builder = rootState.builder();
+
+        NodeBuilder file = idx.getChildNode(":data").getChildNode("_0.cfe");
+        List<Blob> blobs = 
Lists.newArrayList(file.getProperty("jcr:data").getValue(Type.BINARIES));
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        IOUtils.copy(blobs.get(0).getNewStream(), baos);
+        byte[] bytes = baos.toByteArray();
+        bytes[0] = (byte)(bytes[0] ^ (1 << 3)); //Flip the 3rd bit to make it 
corrupt
+        blobs.set(0, new ArrayBasedBlob(bytes));
+        file.setProperty("jcr:data", blobs, Type.BINARIES);
+
+        builder.setChildNode("fooIndex", idx.getNodeState());
+        NodeState indexState = builder.getNodeState();
+
+        System.out.println(NodeStateUtils.toString(idx.getNodeState()));
+
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(indexState, "/fooIndex", temporaryFolder.getRoot());
+        Result result = checker.check(Level.FULL);
+        assertFalse(result.clean);
+        assertEquals(0, result.dirStatus.get(0).missingFiles.size());
+        assertFalse(result.dirStatus.get(0).status.clean);
+    }
+
+    private void createIndex(Directory dir, int numOfDocs) throws IOException {
+        IndexWriter w = new IndexWriter(dir, new 
IndexWriterConfig(Version.LUCENE_47, new OakAnalyzer(Version.LUCENE_47)));
+        for (int i = 0; i < numOfDocs; i++) {
+            Document d1 = new Document();
+            d1.add(newPathField("/a/b"));
+            w.addDocument(d1);
+        }
+        w.close();
+        dir.close();
+    }
+
     private static class FailingBlob extends ArrayBasedBlob {
         static int count;
         private final String id;


Reply via email to