Author: chetanm
Date: Thu May 4 06:42:07 2017
New Revision: 1793740
URL: http://svn.apache.org/viewvc?rev=1793740&view=rev
Log:
OAK-5558 - Consistency checker for Lucene indexes
Implemented basic check which checks for all referred blobs being present and
valid
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
(with props)
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
(with props)
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java?rev=1793740&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
Thu May 4 06:42:07 2017
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.jcr.PropertyType;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Root;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants;
+import org.apache.jackrabbit.oak.plugins.tree.RootFactory;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class IndexConsistencyChecker {
+ private final Logger log = LoggerFactory.getLogger(getClass());
+ private final NodeState rootState;
+ private final String indexPath;
+
+ public enum Level {
+ /**
+ * Consistency check would only check if all blobs referred by index
nodes
+ * are present in BlobStore
+ */
+ BLOBS_ONLY,
+ /**
+ * Performs full check via {@link org.apache.lucene.index.CheckIndex}.
This
+ * reads whole index and hence can take time
+ */
+ FULL
+ }
+
+ public static class Result {
+ /** True if no problems were found with the index. */
+ public boolean clean;
+
+ public boolean typeMismatch;
+
+ public boolean missingBlobs;
+
+ public boolean blobSizeMismatch;
+
+ public String indexPath;
+
+ public long binaryPropSize;
+
+ public List<String> invalidBlobIds = new ArrayList<>();
+
+ public List<String> msgs = new ArrayList<>();
+ }
+
+ public IndexConsistencyChecker(NodeState rootState, String indexPath) {
+ this.rootState = rootState;
+ this.indexPath = indexPath;
+ }
+
+ public Result check(Level level){
+ Stopwatch watch = Stopwatch.createStarted();
+ Result result = new Result();
+ result.indexPath = indexPath;
+ result.clean = true;
+
+ log.debug("[{}] Starting check", indexPath);
+
+ switch (level){
+ case BLOBS_ONLY :
+ checkBlobs(result);
+ break;
+ }
+
+ if (result.clean){
+ log.info("[] No problems were detected with this index. Time taken
{}", indexPath, watch);
+ } else {
+ log.info("[] Problems detected with this index. Time taken {}",
indexPath, watch);
+ }
+
+ return result;
+ }
+
+ private void checkBlobs(Result result) {
+ Root root = RootFactory.createReadOnlyRoot(rootState);
+ Tree idx = root.getTree(indexPath);
+ PropertyState type = idx.getProperty("type");
+ if (type != null &&
LuceneIndexConstants.TYPE_LUCENE.equals(type.getValue(Type.STRING))){
+ checkBlobs(result, idx);
+ } else {
+ result.clean = false;
+ result.typeMismatch = true;
+ }
+ }
+
+ private void checkBlobs(Result result, Tree tree) {
+ for (PropertyState ps : tree.getProperties()){
+ if (ps.getType().tag() == PropertyType.BINARY){
+ if (ps.isArray()){
+ for (int i = 0; i < ps.count(); i++) {
+ Blob b = ps.getValue(Type.BINARY, i);
+ checkBlob(ps.getName(), b, tree, result);
+ }
+ } else {
+ Blob b = ps.getValue(Type.BINARY);
+ checkBlob(ps.getName(), b, tree, result);
+ }
+ }
+ }
+
+ for (Tree child : tree.getChildren()){
+ checkBlobs(result, child);
+ }
+ }
+
+ private void checkBlob(String propName, Blob blob, Tree tree, Result
result) {
+ String id = blob.getContentIdentity();
+ String blobPath = String.format("%s/%s/%s", tree.getPath(), propName,
id);
+ try{
+ InputStream is = blob.getNewStream();
+ CountingInputStream cis = new CountingInputStream(is);
+ IOUtils.copyLarge(cis, ByteStreams.nullOutputStream());
+
+ if (cis.getCount() != blob.length()){
+ String msg = String.format("Invalid blob %s. Length mismatch -
expected ${%d} -> found ${%d}",
+ blobPath, blob.length(), cis.getCount());
+ result.msgs.add(msg);
+ result.invalidBlobIds.add(id);
+ log.warn("[{}] {}", indexPath, msg);
+ result.clean = false;
+ result.blobSizeMismatch = true;
+ }
+ result.binaryPropSize += cis.getCount();
+ } catch (Exception e) {
+ log.warn("[{}] Error occurred reading blob at {}", indexPath,
blobPath, e);
+ result.invalidBlobIds.add(id);
+ result.clean = false;
+ result.missingBlobs = true;
+ }
+ }
+
+}
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyChecker.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java?rev=1793740&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
Thu May 4 06:42:07 2017
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
+
+import java.io.InputStream;
+
+import javax.annotation.Nonnull;
+
+import com.google.common.collect.Lists;
+import org.apache.jackrabbit.oak.api.Type;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Level;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.IndexConsistencyChecker.Result;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.junit.Test;
+
+import static
org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
+import static org.junit.Assert.*;
+
+public class IndexConsistencyCheckerTest {
+
+ @Test
+ public void emptyIndex() throws Exception{
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(EMPTY_NODE, "/foo");
+ Result result = checker.check(Level.BLOBS_ONLY);
+ assertFalse(result.clean);
+ assertTrue(result.typeMismatch);
+ assertEquals(result.indexPath, "/foo");
+ }
+
+ @Test
+ public void blobsWithError() throws Exception{
+ FailingBlob failingBlob = new FailingBlob("foo");
+ IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
+
+ NodeBuilder idx = defnBuilder.build().builder();
+ idx.setProperty("foo", failingBlob);
+ idx.child(":index").setProperty("foo", failingBlob);
+ idx.child("b").setProperty("foo", Lists.newArrayList(failingBlob,
failingBlob), Type.BINARIES);
+
+ NodeBuilder builder = EMPTY_NODE.builder();
+ builder.setChildNode("a", idx.getNodeState());
+
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a");
+ Result result = checker.check(Level.BLOBS_ONLY);
+
+ assertFalse(result.clean);
+ assertTrue(result.missingBlobs);
+ assertFalse(result.blobSizeMismatch);
+ assertEquals(4, result.invalidBlobIds.size());
+ }
+
+ @Test
+ public void blobsWithSizeMismatch() throws Exception{
+ FailingBlob failingBlob = new FailingBlob("foo", true);
+ IndexDefinitionBuilder defnBuilder = new IndexDefinitionBuilder();
+
+ NodeBuilder idx = defnBuilder.build().builder();
+ idx.child(":index").setProperty("foo", failingBlob);
+
+ NodeBuilder builder = EMPTY_NODE.builder();
+ builder.setChildNode("a", idx.getNodeState());
+
+ IndexConsistencyChecker checker = new
IndexConsistencyChecker(builder.getNodeState(), "/a");
+ Result result = checker.check(Level.BLOBS_ONLY);
+
+ assertFalse(result.clean);
+ assertFalse(result.missingBlobs);
+ assertTrue(result.blobSizeMismatch);
+ assertEquals(1, result.invalidBlobIds.size());
+ }
+
+ private static class FailingBlob extends ArrayBasedBlob {
+ static int count;
+ private final String id;
+ private final boolean corruptLength;
+
+ public FailingBlob(String s) {
+ this(s, false);
+ }
+
+ public FailingBlob(String s, boolean corruptLength) {
+ super(s.getBytes());
+ this.id = String.valueOf(++count);
+ this.corruptLength = corruptLength;
+ }
+
+ @Nonnull
+ @Override
+ public InputStream getNewStream() {
+ if (corruptLength){
+ return super.getNewStream();
+ }
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getContentIdentity() {
+ return id;
+ }
+
+ @Override
+ public long length() {
+ return corruptLength ? super.length() + 1 : super.length();
+ }
+ }
+
+
+}
\ No newline at end of file
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexConsistencyCheckerTest.java
------------------------------------------------------------------------------
svn:eol-style = native