Author: chetanm
Date: Thu Sep 15 07:15:09 2016
New Revision: 1760836

URL: http://svn.apache.org/viewvc?rev=1760836&view=rev
Log:
OAK-4412 - Lucene hybrid index

Avoid costly text extraction logic for synchronous indexing mode. Binary 
property would be indexed only via async indexer

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/hybrid/HybridIndexTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 Thu Sep 15 07:15:09 2016
@@ -928,6 +928,10 @@ public class LuceneIndexEditor implement
     }
 
     private String parseStringValue(Blob v, Metadata metadata, String path, 
String propertyName) {
+        if (!context.isAsyncIndexing()){
+            //Skip text extraction for sync indexing
+            return null;
+        }
         String text = context.getExtractedTextCache().get(path, propertyName, 
v, context.isReindex());
         if (text == null){
             text = parseStringValue0(v, metadata, path);

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
 Thu Sep 15 07:15:09 2016
@@ -84,6 +84,8 @@ public class LuceneIndexEditorContext {
     private final IndexAugmentorFactory augmentorFactory;
 
     private final NodeState root;
+
+    private final boolean asyncIndexing;
     /**
      * The media types supported by the parser used.
      */
@@ -98,7 +100,8 @@ public class LuceneIndexEditorContext {
                              IndexUpdateCallback updateCallback,
                              LuceneIndexWriterFactory indexWriterFactory,
                              ExtractedTextCache extractedTextCache,
-                             IndexAugmentorFactory augmentorFactory) {
+                             IndexAugmentorFactory augmentorFactory,
+                             boolean asyncIndexing) {
         configureUniqueId(definition);
         this.root = root;
         this.definitionBuilder = definition;
@@ -108,6 +111,7 @@ public class LuceneIndexEditorContext {
         this.updateCallback = updateCallback;
         this.extractedTextCache = extractedTextCache;
         this.augmentorFactory = augmentorFactory;
+        this.asyncIndexing = asyncIndexing;
         if (this.definition.isOfOldFormat()){
             IndexDefinition.updateDefinition(definition);
         }
@@ -180,6 +184,10 @@ public class LuceneIndexEditorContext {
         return indexedNodes;
     }
 
+    public boolean isAsyncIndexing() {
+        return asyncIndexing;
+    }
+
     public long getIndexedNodes() {
         return indexedNodes;
     }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
 Thu Sep 15 07:15:09 2016
@@ -97,6 +97,7 @@ public class LuceneIndexEditorProvider i
             IndexingContext indexingContext = 
((ContextAwareCallback)callback).getIndexingContext();
             LuceneIndexWriterFactory writerFactory = indexWriterFactory;
             IndexDefinition indexDefinition = null;
+            boolean asyncIndexing = true;
             if (!indexingContext.isAsync() && 
IndexDefinition.supportsSyncIndexing(definition)) {
 
                 //Would not participate in reindexing. Only interested in
@@ -120,10 +121,12 @@ public class LuceneIndexEditorProvider i
                 //some stuff gets written to NodeBuilder. That logic should be 
refactored
                 //to be moved to LuceneIndexWriter
                 definition = new ReadOnlyBuilder(definition.getNodeState());
+
+                asyncIndexing = false;
             }
 
             LuceneIndexEditorContext context = new 
LuceneIndexEditorContext(root, definition, indexDefinition, callback,
-                    writerFactory, extractedTextCache, augmentorFactory);
+                    writerFactory, extractedTextCache, augmentorFactory, 
asyncIndexing);
             return new LuceneIndexEditor(context);
         }
         return null;

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 Thu Sep 15 07:15:09 2016
@@ -242,14 +242,7 @@ public class LucenePropertyIndexTest ext
     }
 
     private Tree createFulltextIndex(Tree index, String name) throws 
CommitFailedException {
-        Tree def = index.addChild(INDEX_DEFINITIONS_NAME).addChild(name);
-        def.setProperty(JcrConstants.JCR_PRIMARYTYPE,
-                INDEX_DEFINITIONS_NODE_TYPE, Type.NAME);
-        def.setProperty(TYPE_PROPERTY_NAME, LuceneIndexConstants.TYPE_LUCENE);
-        def.setProperty(REINDEX_PROPERTY_NAME, true);
-        
def.setProperty(createProperty(LuceneIndexConstants.INCLUDE_PROPERTY_TYPES,
-                of(PropertyType.TYPENAME_STRING, 
PropertyType.TYPENAME_BINARY), STRINGS));
-        return index.getChild(INDEX_DEFINITIONS_NAME).getChild(name);
+        return TestUtil.createFulltextIndex(index, name);
     }
 
     @Test
@@ -2369,12 +2362,7 @@ public class LucenePropertyIndexTest ext
     }
 
     private Tree createFileNode(Tree tree, String name, Blob content, String 
mimeType){
-        Tree fileNode = tree.addChild(name);
-        fileNode.setProperty(JcrConstants.JCR_PRIMARYTYPE, 
JcrConstants.NT_FILE, Type.NAME);
-        Tree jcrContent = fileNode.addChild(JCR_CONTENT);
-        jcrContent.setProperty(JcrConstants.JCR_DATA, content);
-        jcrContent.setProperty(JcrConstants.JCR_MIMETYPE, mimeType);
-        return jcrContent;
+        return TestUtil.createFileNode(tree, name, content, mimeType);
     }
 
     private Tree usc(Tree parent, String childName){

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
 Thu Sep 15 07:15:09 2016
@@ -25,11 +25,14 @@ import java.util.concurrent.atomic.Atomi
 
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
+import javax.jcr.PropertyType;
 import javax.jcr.Repository;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.api.JackrabbitRepository;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.api.Root;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
@@ -49,6 +52,14 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.collect.ImmutableSet.of;
+import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
+import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NODE_TYPE;
+import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
+import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
+import static 
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
 
 public class TestUtil {
     private static final AtomicInteger COUNTER = new AtomicInteger();
@@ -183,6 +194,26 @@ public class TestUtil {
         return builder;
     }
 
+    public static Tree createFileNode(Tree tree, String name, Blob content, 
String mimeType){
+        Tree fileNode = tree.addChild(name);
+        fileNode.setProperty(JcrConstants.JCR_PRIMARYTYPE, 
JcrConstants.NT_FILE, Type.NAME);
+        Tree jcrContent = fileNode.addChild(JCR_CONTENT);
+        jcrContent.setProperty(JcrConstants.JCR_DATA, content);
+        jcrContent.setProperty(JcrConstants.JCR_MIMETYPE, mimeType);
+        return jcrContent;
+    }
+
+    public static Tree createFulltextIndex(Tree index, String name) throws 
CommitFailedException {
+        Tree def = index.addChild(INDEX_DEFINITIONS_NAME).addChild(name);
+        def.setProperty(JcrConstants.JCR_PRIMARYTYPE,
+                INDEX_DEFINITIONS_NODE_TYPE, Type.NAME);
+        def.setProperty(TYPE_PROPERTY_NAME, LuceneIndexConstants.TYPE_LUCENE);
+        def.setProperty(REINDEX_PROPERTY_NAME, true);
+        
def.setProperty(createProperty(LuceneIndexConstants.INCLUDE_PROPERTY_TYPES,
+                of(PropertyType.TYPENAME_STRING, 
PropertyType.TYPENAME_BINARY), STRINGS));
+        return index.getChild(INDEX_DEFINITIONS_NAME).getChild(name);
+    }
+
     public static void shutdown(Repository repository) {
         if (repository instanceof JackrabbitRepository) {
             ((JackrabbitRepository) repository).shutdown();

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/hybrid/HybridIndexTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/hybrid/HybridIndexTest.java?rev=1760836&r1=1760835&r2=1760836&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/hybrid/HybridIndexTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/hybrid/HybridIndexTest.java
 Thu Sep 15 07:15:09 2016
@@ -21,11 +21,13 @@ package org.apache.jackrabbit.oak.plugin
 
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Collections;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 
+import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
 import com.google.common.base.Predicate;
@@ -41,10 +43,12 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.lucene.IndexTracker;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReaderFactory;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReaderFactory;
 import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
 import 
org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
 import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
 import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
 import org.apache.jackrabbit.oak.query.AbstractQueryTest;
@@ -66,6 +70,7 @@ import static org.apache.jackrabbit.oak.
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndexTest.createIndex;
 import static 
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
 import static 
org.apache.jackrabbit.oak.spi.mount.Mounts.defaultMountInfoProvider;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
 public class HybridIndexTest extends AbstractQueryTest {
@@ -161,6 +166,31 @@ public class HybridIndexTest extends Abs
         assertQuery("select [jcr:path] from [nt:base] where [foo] = 'bar'", 
of("/a", "/b", "/c"));
     }
 
+    @Test
+    public void noTextExtractionForSyncCommit() throws Exception{
+        String idxName = "hybridtest";
+        Tree idx = TestUtil.createFulltextIndex(root.getTree("/"), idxName);
+        idx.setProperty(createProperty(IndexConstants.ASYNC_PROPERTY_NAME, 
ImmutableSet.of("sync" , "async"), STRINGS));
+        root.commit();
+
+        runAsyncIndex();
+
+        AccessRecordingBlob testBlob =
+                new AccessRecordingBlob("<?xml version=\"1.0\" 
encoding=\"UTF-8\"?><msg>sky is blue</msg>".getBytes());
+
+        Tree test = root.getTree("/").addChild("test");
+        TestUtil.createFileNode(test, "msg", testBlob, "application/xml");
+        root.commit();
+
+        assertEquals(0, testBlob.accessCount);
+        assertQuery("select * from [nt:base] where CONTAINS(*, 'sky')", 
Collections.<String>emptyList());
+
+        runAsyncIndex();
+        assertEquals(1, testBlob.accessCount);
+        assertQuery("select * from [nt:base] where CONTAINS(*, 'sky')", 
of("/test/msg/jcr:content"));
+
+    }
+
     private void runAsyncIndex() {
         Runnable async = WhiteboardUtils.getService(wb, Runnable.class, new 
Predicate<Runnable>() {
             @Override
@@ -180,4 +210,18 @@ public class HybridIndexTest extends Abs
         }
         return base;
     }
+
+    private static class AccessRecordingBlob extends ArrayBasedBlob {
+        int accessCount = 0;
+        public AccessRecordingBlob(byte[] value) {
+            super(value);
+        }
+
+        @Nonnull
+        @Override
+        public InputStream getNewStream() {
+            accessCount++;
+            return super.getNewStream();
+        }
+    }
 }


Reply via email to