Author: chetanm
Date: Thu May  4 06:42:07 2017
New Revision: 1793740

URL: http://svn.apache.org/viewvc?rev=1793740&view=rev
Log:
OAK-5558 - Consistency checker for Lucene indexes

Implemented basic check which checks for all referred blobs being present and 
valid

Added:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
   (with props)
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
   (with props)

Added: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java?rev=1793740&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
 Thu May  4 06:42:07 2017
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.jcr.PropertyType;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Root;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants;
+import org.apache.jackrabbit.oak.plugins.tree.RootFactory;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class IndexConsistencyChecker {
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    private final NodeState rootState;
+    private final String indexPath;
+
+    public enum Level {
+        /**
+         * Consistency check would only check if all blobs referred by index 
nodes
+         * are present in BlobStore
+         */
+        BLOBS_ONLY,
+        /**
+         * Performs full check via {@link org.apache.lucene.index.CheckIndex}. 
This
+         * reads whole index and hence can take time
+         */
+        FULL
+    }
+
+    public static class Result {
+        /** True if no problems were found with the index. */
+        public boolean clean;
+
+        public boolean typeMismatch;
+
+        public boolean missingBlobs;
+
+        public boolean blobSizeMismatch;
+
+        public String indexPath;
+
+        public long binaryPropSize;
+
+        public List<String> invalidBlobIds = new ArrayList<>();
+
+        public List<String> msgs = new ArrayList<>();
+    }
+
+    public IndexConsistencyChecker(NodeState rootState, String indexPath) {
+        this.rootState = rootState;
+        this.indexPath = indexPath;
+    }
+
+    public Result check(Level level){
+        Stopwatch watch = Stopwatch.createStarted();
+        Result result = new Result();
+        result.indexPath = indexPath;
+        result.clean = true;
+
+        log.debug("[{}] Starting check", indexPath);
+
+        switch (level){
+            case BLOBS_ONLY :
+                checkBlobs(result);
+                break;
+        }
+
+        if (result.clean){
+            log.info("[] No problems were detected with this index. Time taken 
{}", indexPath, watch);
+        } else {
+            log.info("[] Problems detected with this index. Time taken {}", 
indexPath, watch);
+        }
+
+        return result;
+    }
+
+    private void checkBlobs(Result result) {
+        Root root = RootFactory.createReadOnlyRoot(rootState);
+        Tree idx = root.getTree(indexPath);
+        PropertyState type = idx.getProperty("type");
+        if (type != null && 
LuceneIndexConstants.TYPE_LUCENE.equals(type.getValue(Type.STRING))){
+            checkBlobs(result, idx);
+        } else {
+            result.clean = false;
+            result.typeMismatch = true;
+        }
+    }
+
+    private void checkBlobs(Result result, Tree tree) {
+        for (PropertyState ps : tree.getProperties()){
+            if (ps.getType().tag() == PropertyType.BINARY){
+                if (ps.isArray()){
+                    for (int i = 0; i < ps.count(); i++) {
+                        Blob b = ps.getValue(Type.BINARY, i);
+                        checkBlob(ps.getName(), b, tree, result);
+                    }
+                } else {
+                    Blob b = ps.getValue(Type.BINARY);
+                    checkBlob(ps.getName(), b, tree, result);
+                }
+            }
+        }
+
+        for (Tree child : tree.getChildren()){
+            checkBlobs(result, child);
+        }
+    }
+
+    private void checkBlob(String propName, Blob blob, Tree tree, Result 
result) {
+        String id = blob.getContentIdentity();
+        String blobPath = String.format("%s/%s/%s", tree.getPath(), propName, 
id);
+        try{
+            InputStream is = blob.getNewStream();
+            CountingInputStream cis = new CountingInputStream(is);
+            IOUtils.copyLarge(cis, ByteStreams.nullOutputStream());
+
+            if (cis.getCount() != blob.length()){
+                String msg = String.format("Invalid blob %s. Length mismatch - 
expected ${%d} -> found ${%d}",
+                        blobPath, blob.length(), cis.getCount());
+                result.msgs.add(msg);
+                result.invalidBlobIds.add(id);
+                log.warn("[{}] {}", indexPath, msg);
+                result.clean = false;
+                result.blobSizeMismatch = true;
+            }
+            result.binaryPropSize += cis.getCount();
+        } catch (Exception e) {
+            log.warn("[{}] Error occurred reading blob at {}", indexPath, 
blobPath, e);
+            result.invalidBlobIds.add(id);
+            result.clean = false;
+            result.missingBlobs = true;
+        }
+    }
+
+}

Propchange: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java?rev=1793740&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
 Thu May  4 06:42:07 2017
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+
+import java.io.InputStream;
+
+import javax.annotation.Nonnull;
+
+import com.google.common.collect.Lists;
+import org.apache.jackrabbit.oak.api.Type;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Level;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Result;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.junit.Test;
+
+import static 
org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
+import static org.junit.Assert.*;
+
+public class IndexConsistencyCheckerTest {
+
+    @Test
+    public void emptyIndex() throws Exception{
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(EMPTY_NODE, "/foo");
+        Result result = checker.check(Level.BLOBS_ONLY);
+        assertFalse(result.clean);
+        assertTrue(result.typeMismatch);
+        assertEquals(result.indexPath, "/foo");
+    }
+
+    @Test
+    public void blobsWithError() throws Exception{
+        FailingBlob failingBlob = new FailingBlob("foo");
+        IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
+
+        NodeBuilder idx = defnBuilder.build().builder();
+        idx.setProperty("foo", failingBlob);
+        idx.child(":index").setProperty("foo", failingBlob);
+        idx.child("b").setProperty("foo", Lists.newArrayList(failingBlob, 
failingBlob), Type.BINARIES);
+
+        NodeBuilder builder = EMPTY_NODE.builder();
+        builder.setChildNode("a", idx.getNodeState());
+
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a");
+        Result result = checker.check(Level.BLOBS_ONLY);
+
+        assertFalse(result.clean);
+        assertTrue(result.missingBlobs);
+        assertFalse(result.blobSizeMismatch);
+        assertEquals(4, result.invalidBlobIds.size());
+    }
+
+    @Test
+    public void blobsWithSizeMismatch() throws Exception{
+        FailingBlob failingBlob = new FailingBlob("foo", true);
+        IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
+
+        NodeBuilder idx = defnBuilder.build().builder();
+        idx.child(":index").setProperty("foo", failingBlob);
+
+        NodeBuilder builder = EMPTY_NODE.builder();
+        builder.setChildNode("a", idx.getNodeState());
+
+        IndexConsistencyChecker checker = new 
IndexConsistencyChecker(builder.getNodeState(), "/a");
+        Result result = checker.check(Level.BLOBS_ONLY);
+
+        assertFalse(result.clean);
+        assertFalse(result.missingBlobs);
+        assertTrue(result.blobSizeMismatch);
+        assertEquals(1, result.invalidBlobIds.size());
+    }
+
+    private static class FailingBlob extends ArrayBasedBlob {
+        static int count;
+        private final String id;
+        private final boolean corruptLength;
+
+        public FailingBlob(String s) {
+           this(s, false);
+        }
+
+        public FailingBlob(String s, boolean corruptLength) {
+            super(s.getBytes());
+            this.id = String.valueOf(++count);
+            this.corruptLength = corruptLength;
+        }
+
+        @Nonnull
+        @Override
+        public InputStream getNewStream() {
+            if (corruptLength){
+                return super.getNewStream();
+            }
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public String getContentIdentity() {
+            return id;
+        }
+
+        @Override
+        public long length() {
+            return corruptLength ? super.length() + 1 : super.length();
+        }
+    }
+
+
+}
\ No newline at end of file

Propchange: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to