Author: chetanm
Date: Thu May 21 10:40:56 2015
New Revision: 1680814
URL: http://svn.apache.org/r1680814
Log:
OAK-2895 - Avoid accessing binary content if the mimeType is excluded from
indexing
Merging 1680806
Added:
jackrabbit/oak/branches/1.2/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
- copied from r1680806,
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
jackrabbit/oak/branches/1.2/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
- copied from r1680806,
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
- copied unchanged from r1680806,
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
Modified:
jackrabbit/oak/branches/1.2/ (props changed)
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu May 21 10:40:56 2015
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414,1673436,1673644,1673662-1673664,1673669,1673695,1674046,1674065,1674075,1674107,1674228,1674880,1675054-1675055,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677939,1677991,1678173,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679235,1680182,1680222,1680232,1680236,1680461,1680805
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414,1673436,1673644,1673662-1673664,1673669,1673695,1674046,1674065,1674075,1674107,1674228,1674880,1675054-1675055,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677939,1677991,1678173,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679235,1680182,1680222,1680232,1680236,1680461,1680805-1680806
/jackrabbit/trunk:1345480
Modified:
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
(original)
+++
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
Thu May 21 10:40:56 2015
@@ -46,6 +46,7 @@ import org.apache.jackrabbit.oak.api.Pro
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
import org.apache.jackrabbit.oak.plugins.index.PathFilter;
@@ -55,6 +56,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.spi.commit.Editor;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.util.BlobByteSource;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoubleField;
@@ -803,7 +805,7 @@ public class LuceneIndexEditor implement
long start = System.currentTimeMillis();
long size = 0;
try {
- CountingInputStream stream = new
CountingInputStream(v.getNewStream());
+ CountingInputStream stream = new CountingInputStream(new
LazyInputStream(new BlobByteSource(v)));
try {
context.getParser().parse(stream, handler, metadata, new
ParseContext());
} finally {
Modified:
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
(original)
+++
jackrabbit/oak/branches/1.2/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
Thu May 21 10:40:56 2015
@@ -21,7 +21,7 @@
<properties>
<detectors>
- <detector class="org.apache.tika.detect.DefaultDetector"/>
+ <detector class="org.apache.tika.detect.TypeDetector"/>
</detectors>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser"/>
Modified:
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1680814&r1=1680813&r2=1680814&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
(original)
+++
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Thu May 21 10:40:56 2015
@@ -19,6 +19,7 @@
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import java.io.InputStream;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Collections;
@@ -26,16 +27,20 @@ import java.util.List;
import java.util.Random;
import java.util.Set;
+import javax.annotation.Nonnull;
import javax.jcr.PropertyType;
+import com.google.common.base.Charsets;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.io.CountingInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.PropertyValue;
@@ -45,6 +50,7 @@ import org.apache.jackrabbit.oak.api.Typ
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
import
org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
@@ -79,6 +85,7 @@ import static org.apache.jackrabbit.oak.
import static
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.containsString;
@@ -1208,6 +1215,22 @@ public class LucenePropertyIndexTest ext
}
@Test
+ public void excludedBlobContentNotAccessed() throws Exception{
+ Tree idx = createFulltextIndex(root.getTree("/"), "test");
+ TestUtil.useV2(idx);
+
+ AccessStateProvidingBlob testBlob =
+ new AccessStateProvidingBlob("<?xml version=\"1.0\"
encoding=\"UTF-8\"?><msg>sky is blue</msg>");
+
+ Tree test = root.getTree("/").addChild("test");
+ createFileNode(test, "zip", testBlob, "application/zip");
+ root.commit();
+
+ assertFalse(testBlob.isStreamAccessed());
+ assertEquals(0, testBlob.readByteCount());
+ }
+
+ @Test
public void maxFieldLengthCheck() throws Exception{
Tree idx = createFulltextIndex(root.getTree("/"), "test");
TestUtil.useV2(idx);
@@ -1312,8 +1335,12 @@ public class LucenePropertyIndexTest ext
}
private Tree createFileNode(Tree tree, String name, String content, String
mimeType){
+ return createFileNode(tree, name, new
ArrayBasedBlob(content.getBytes()), mimeType);
+ }
+
+ private Tree createFileNode(Tree tree, String name, Blob content, String
mimeType){
Tree jcrContent = tree.addChild(name).addChild(JCR_CONTENT);
- jcrContent.setProperty(JcrConstants.JCR_DATA, content.getBytes());
+ jcrContent.setProperty(JcrConstants.JCR_DATA, content);
jcrContent.setProperty(JcrConstants.JCR_MIMETYPE, mimeType);
return jcrContent;
}
@@ -1483,4 +1510,38 @@ public class LucenePropertyIndexTest ext
'}';
}
}
+
+ private static class AccessStateProvidingBlob extends ArrayBasedBlob {
+ private CountingInputStream stream;
+
+ public AccessStateProvidingBlob(byte[] value) {
+ super(value);
+ }
+
+ public AccessStateProvidingBlob(String content) {
+ this(content.getBytes(Charsets.UTF_8));
+ }
+
+ @Nonnull
+ @Override
+ public InputStream getNewStream() {
+ stream = new CountingInputStream(super.getNewStream());
+ return stream;
+ }
+
+ public boolean isStreamAccessed() {
+ return stream != null;
+ }
+
+ public void resetState(){
+ stream = null;
+ }
+
+ public long readByteCount(){
+ if (stream == null){
+ return 0;
+ }
+ return stream.getCount();
+ }
+ }
}