Author: chetanm
Date: Thu May 21 10:40:56 2015
New Revision: 1680814

URL: http://svn.apache.org/r1680814
Log:
OAK-2895 - Avoid accessing binary content if the mimeType is excluded from 
indexing

Merging 1680806

Added:
    
jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
      - copied from r1680806, 
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
    
jackrabbit/oak/branches/1.2/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
      - copied from r1680806, 
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
    
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
      - copied unchanged from r1680806, 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
Modified:
    jackrabbit/oak/branches/1.2/   (props changed)
    
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
    
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
    
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java

Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu May 21 10:40:56 2015
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414,1673436,1673644,1673662-1673664,1673669,1673695,1674046,1674065,1674075,1674107,1674228,1674880,1675054-1675055,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677939,1677991,1678173,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679235,1680182,1680222,1680232,1680236,1680461,1680805
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414,1673436,1673644,1673662-1673664,1673669,1673695,1674046,1674065,1674075,1674107,1674228,1674880,1675054-1675055,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677939,1677991,1678173,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679235,1680182,1680222,1680232,1680236,1680461,1680805-1680806
 /jackrabbit/trunk:1345480

Modified: 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 Thu May 21 10:40:56 2015
@@ -46,6 +46,7 @@ import org.apache.jackrabbit.oak.api.Pro
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
 import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
 import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
 import org.apache.jackrabbit.oak.plugins.index.PathFilter;
@@ -55,6 +56,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.spi.commit.Editor;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.util.BlobByteSource;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleDocValuesField;
 import org.apache.lucene.document.DoubleField;
@@ -803,7 +805,7 @@ public class LuceneIndexEditor implement
         long start = System.currentTimeMillis();
         long size = 0;
         try {
-            CountingInputStream stream = new 
CountingInputStream(v.getNewStream());
+            CountingInputStream stream = new CountingInputStream(new 
LazyInputStream(new BlobByteSource(v)));
             try {
                 context.getParser().parse(stream, handler, metadata, new 
ParseContext());
             } finally {

Modified: 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
 Thu May 21 10:40:56 2015
@@ -21,7 +21,7 @@
 
 <properties>
   <detectors>
-    <detector class="org.apache.tika.detect.DefaultDetector"/>
+    <detector class="org.apache.tika.detect.TypeDetector"/>
   </detectors>
   <parsers>
     <parser class="org.apache.tika.parser.DefaultParser"/>

Modified: 
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 Thu May 21 10:40:56 2015
@@ -19,6 +19,7 @@
 
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
+import java.io.InputStream;
 import java.text.ParseException;
 import java.util.Calendar;
 import java.util.Collections;
@@ -26,16 +27,20 @@ import java.util.List;
 import java.util.Random;
 import java.util.Set;
 
+import javax.annotation.Nonnull;
 import javax.jcr.PropertyType;
 
+import com.google.common.base.Charsets;
 import com.google.common.collect.ComparisonChain;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.io.CountingInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.api.ContentRepository;
 import org.apache.jackrabbit.oak.api.PropertyValue;
@@ -45,6 +50,7 @@ import org.apache.jackrabbit.oak.api.Typ
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
 import 
org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
 import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
 import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
 import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
@@ -79,6 +85,7 @@ import static org.apache.jackrabbit.oak.
 import static 
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
 import static org.hamcrest.CoreMatchers.not;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.matchers.JUnitMatchers.containsString;
 
@@ -1208,6 +1215,22 @@ public class LucenePropertyIndexTest ext
     }
 
     @Test
+    public void excludedBlobContentNotAccessed() throws Exception{
+        Tree idx = createFulltextIndex(root.getTree("/"), "test");
+        TestUtil.useV2(idx);
+
+        AccessStateProvidingBlob testBlob =
+                new AccessStateProvidingBlob("<?xml version=\"1.0\" 
encoding=\"UTF-8\"?><msg>sky is blue</msg>");
+
+        Tree test = root.getTree("/").addChild("test");
+        createFileNode(test, "zip", testBlob, "application/zip");
+        root.commit();
+
+        assertFalse(testBlob.isStreamAccessed());
+        assertEquals(0, testBlob.readByteCount());
+    }
+
+    @Test
     public void maxFieldLengthCheck() throws Exception{
         Tree idx = createFulltextIndex(root.getTree("/"), "test");
         TestUtil.useV2(idx);
@@ -1312,8 +1335,12 @@ public class LucenePropertyIndexTest ext
     }
 
     private Tree createFileNode(Tree tree, String name, String content, String 
mimeType){
+        return createFileNode(tree, name, new 
ArrayBasedBlob(content.getBytes()), mimeType);
+    }
+
+    private Tree createFileNode(Tree tree, String name, Blob content, String 
mimeType){
         Tree jcrContent = tree.addChild(name).addChild(JCR_CONTENT);
-        jcrContent.setProperty(JcrConstants.JCR_DATA, content.getBytes());
+        jcrContent.setProperty(JcrConstants.JCR_DATA, content);
         jcrContent.setProperty(JcrConstants.JCR_MIMETYPE, mimeType);
         return jcrContent;
     }
@@ -1483,4 +1510,38 @@ public class LucenePropertyIndexTest ext
                     '}';
         }
     }
+
+    private static class AccessStateProvidingBlob extends ArrayBasedBlob {
+        private CountingInputStream stream;
+
+        public AccessStateProvidingBlob(byte[] value) {
+            super(value);
+        }
+
+        public AccessStateProvidingBlob(String content) {
+            this(content.getBytes(Charsets.UTF_8));
+        }
+
+        @Nonnull
+        @Override
+        public InputStream getNewStream() {
+            stream = new CountingInputStream(super.getNewStream());
+            return stream;
+        }
+
+        public boolean isStreamAccessed() {
+            return stream != null;
+        }
+
+        public void resetState(){
+            stream = null;
+        }
+
+        public long readByteCount(){
+            if (stream == null){
+                return 0;
+            }
+            return stream.getCount();
+        }
+    }
 }


Reply via email to