Author: chetanm
Date: Mon Oct 13 10:24:46 2014
New Revision: 1631334
URL: http://svn.apache.org/r1631334
Log:
OAK-2005 - Use separate Lucene index for performing property related queries
Making indexing side type aware.
- date would be stored as long with secs precision.
Later would make precision level configurable
- if index is not fulltext then propertyNames would be
checked for include list and no filtering on type would
be performed
Added:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
(with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
Mon Oct 13 10:24:46 2014
@@ -16,6 +16,12 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.primitives.Ints;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.util.ISO8601;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
@@ -41,6 +47,13 @@ public final class FieldFactory {
private static final FieldType OAK_TYPE_NOT_STORED = new FieldType();
+ private static final int[] TYPABLE_TAGS = {
+ Type.DATE.tag(),
+ Type.BOOLEAN.tag(),
+ Type.DOUBLE.tag(),
+ Type.LONG.tag(),
+ };
+
static {
OAK_TYPE.setIndexed(true);
OAK_TYPE.setOmitNorms(true);
@@ -55,6 +68,12 @@ public final class FieldFactory {
OAK_TYPE_NOT_STORED.setIndexOptions(DOCS_AND_FREQS_AND_POSITIONS);
OAK_TYPE_NOT_STORED.setTokenized(true);
OAK_TYPE_NOT_STORED.freeze();
+
+ Arrays.sort(TYPABLE_TAGS);
+ }
+
+ public static boolean canCreateTypedField(Type<?> type) {
+ return Ints.contains(TYPABLE_TAGS, type.tag());
}
private final static class OakTextField extends Field {
@@ -86,4 +105,19 @@ public final class FieldFactory {
return new TextField(FULLTEXT, value, NO);
}
+ /**
+ * Date values are saved with sec resolution
+ * @param date jcr data string
+ * @return date value in seconds
+ */
+ public static Long dateToLong(String date){
+ if( date == null){
+ return null;
+ }
+ //TODO Should we change the precision to 5 min resolution
+ //TODO make if configurable as part of property definition
+ long millis = ISO8601.parse(date).getTimeInMillis();
+ return TimeUnit.MILLISECONDS.toSeconds(millis);
+ }
+
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
Mon Oct 13 10:24:46 2014
@@ -28,6 +28,7 @@ import com.google.common.collect.Immutab
import com.google.common.collect.Sets;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -109,6 +110,15 @@ public class IndexDefinition {
return storageEnabled;
}
+ public boolean skipTokenization(String propertyName) {
+ //If fulltext is not enabled then we never tokenize
+ //irrespective of property name
+ if (!isFullTextEnabled()) {
+ return true;
+ }
+ return LuceneIndexHelper.skipTokenization(propertyName);
+ }
+
//~------------------------------------------< Internal >
private static boolean getOptionalValue(NodeBuilder definition, String
propName, boolean defaultVal){
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
Mon Oct 13 10:24:46 2014
@@ -23,7 +23,6 @@ import static org.apache.jackrabbit.oak.
import static
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
import static
org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
-import static
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization;
import java.io.IOException;
import java.io.InputStream;
@@ -42,7 +41,10 @@ import org.apache.jackrabbit.oak.spi.sta
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.tika.metadata.Metadata;
@@ -191,24 +193,42 @@ public class LuceneIndexEditor implement
}
private Document makeDocument(String path, NodeState state, boolean
isUpdate) throws CommitFailedException {
+ //TODO Possibly we can add support for compound properties like foo/bar
+ //i.e. support for relative path restrictions
+
List<Field> fields = new ArrayList<Field>();
boolean dirty = false;
for (PropertyState property : state.getProperties()) {
String pname = property.getName();
if (isVisible(pname)
- && (context.getPropertyTypes() & (1 << property.getType()
- .tag())) != 0 && context.includeProperty(pname)) {
+ && context.includeProperty(pname)) {
+
+ //In case of fulltext we also check if given type is enabled
for indexing
+ //TODO Use context.includePropertyType however that cause
issue. Need
+ //to make filtering based on type consistent both on indexing
side and
+ //query size
+ if(context.isFullTextEnabled()
+ && (context.getPropertyTypes() & (1 <<
property.getType()
+ .tag())) == 0){
+ continue;
+ }
+
if (Type.BINARY.tag() == property.getType().tag()) {
this.context.indexUpdate();
fields.addAll(newBinary(property, state));
dirty = true;
+ } else if(!context.isFullTextEnabled()
+ &&
FieldFactory.canCreateTypedField(property.getType())){
+ dirty = addTypedFields(fields, property);
} else {
for (String value : property.getValue(Type.STRINGS)) {
this.context.indexUpdate();
fields.add(newPropertyField(pname, value,
- !skipTokenization(pname),
+ !context.skipTokenization(pname),
context.isStored(pname)));
- fields.add(newFulltextField(value));
+ if (context.isFullTextEnabled()) {
+ fields.add(newFulltextField(value));
+ }
dirty = true;
}
}
@@ -222,7 +242,9 @@ public class LuceneIndexEditor implement
Document document = new Document();
document.add(newPathField(path));
String name = getName(path);
- if (name != null) {
+
+ //TODO Possibly index nodeName without tokenization for node name
based queries
+ if (context.isFullTextEnabled()) {
document.add(newFulltextField(name));
}
for (Field f : fields) {
@@ -231,6 +253,34 @@ public class LuceneIndexEditor implement
return document;
}
+ private boolean addTypedFields(List<Field> fields, PropertyState property)
throws CommitFailedException {
+ int tag = property.getType().tag();
+ String name = property.getName();
+ boolean fieldAdded = false;
+ for (int i = 0; i < property.count(); i++) {
+ Field f = null;
+ if (tag == Type.LONG.tag()) {
+ //TODO Distinguish fields which need to be used for search and
for sort
+ //If a field is only used for Sort then it can be stored with
less precision
+ f = new LongField(name, property.getValue(Type.LONG, i),
Field.Store.NO);
+ } else if (tag == Type.DATE.tag()) {
+ String date = property.getValue(Type.DATE, i);
+ f = new LongField(name, FieldFactory.dateToLong(date),
Field.Store.NO);
+ } else if (tag == Type.DOUBLE.tag()) {
+ f = new DoubleField(name, property.getValue(Type.DOUBLE, i),
Field.Store.NO);
+ } else if (tag == Type.BOOLEAN.tag()) {
+ f = new StringField(name, property.getValue(Type.BOOLEAN,
i).toString(), Field.Store.NO);
+ }
+
+ if (f != null) {
+ this.context.indexUpdate();
+ fields.add(f);
+ fieldAdded = true;
+ }
+ }
+ return fieldAdded;
+ }
+
private static boolean isVisible(String name) {
return name.charAt(0) != ':';
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
Mon Oct 13 10:24:46 2014
@@ -54,6 +54,7 @@ public class LuceneIndexEditorContext {
try {
IndexWriterConfig config = new IndexWriterConfig(VERSION,
analyzer);
config.setMergeScheduler(new SerialMergeScheduler());
+ //TODO Use default codec for index where full text index is not
stored
config.setCodec(new OakCodec());
return config;
} finally {
@@ -107,14 +108,14 @@ public class LuceneIndexEditorContext {
this.updateCallback = updateCallback;
}
- int getPropertyTypes() {
- return definition.getPropertyTypes();
- }
-
boolean includeProperty(String name) {
return definition.includeProperty(name);
}
+ boolean includePropertyType(int type){
+ return definition.includePropertyType(type);
+ }
+
Parser getParser() {
return parser;
}
@@ -162,4 +163,15 @@ public class LuceneIndexEditorContext {
return definition.isStored(name);
}
+ public boolean isFullTextEnabled() {
+ return definition.isFullTextEnabled();
+ }
+
+ public boolean skipTokenization(String propertyName){
+ return definition.skipTokenization(propertyName);
+ }
+
+ public int getPropertyTypes() {
+ return definition.getPropertyTypes();
+ }
}
Added:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java?rev=1631334&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
Mon Oct 13 10:24:46 2014
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+
+import org.apache.jackrabbit.oak.plugins.index.IndexUpdateProvider;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EditorHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.test.ISO8601;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.junit.After;
+import org.junit.Test;
+
+import static com.google.common.collect.ImmutableSet.of;
+import static javax.jcr.PropertyType.TYPENAME_STRING;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
+import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
+import static
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_NAMES;
+import static
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
+import static
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition;
+import static
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
+import static
org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent.INITIAL_CONTENT;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+
+public class LuceneIndexEditorTest {
+ private static final Analyzer analyzer = LuceneIndexConstants.ANALYZER;
+
+ private static final EditorHook HOOK = new EditorHook(
+ new IndexUpdateProvider(
+ new LuceneIndexEditorProvider().with(analyzer)));
+
+ private NodeState root = INITIAL_CONTENT;
+
+ private NodeBuilder builder = root.builder();
+
+ private IndexTracker tracker = new IndexTracker();
+
+ private IndexNode indexNode;
+
+ @Test
+ public void testLuceneWithFullText() throws Exception {
+ NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME);
+ newLuceneIndexDefinition(index, "lucene",
+ of(TYPENAME_STRING));
+
+ NodeState before = builder.getNodeState();
+ builder.child("test").setProperty("foo", "fox is jumping");
+ builder.child("test").setProperty("price", 100);
+ NodeState after = builder.getNodeState();
+
+ NodeState indexed = HOOK.processCommit(before, after,
CommitInfo.EMPTY);
+ tracker.update(indexed);
+
+ assertEquals("/test", query("foo:fox"));
+ assertNull("Non string properties not indexed by default",
+ getPath(NumericRangeQuery.newLongRange("price", 100L, 100L,
true, true)));
+ }
+
+ @Test
+ public void testLuceneWithNonFullText() throws Exception {
+ NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME);
+ NodeBuilder nb = newLuceneIndexDefinition(index, "lucene",
+ of(TYPENAME_STRING));
+ nb.setProperty(LuceneIndexConstants.FULL_TEXT_ENABLED, false);
+ nb.setProperty(createProperty(INCLUDE_PROPERTY_NAMES, of("foo",
"price", "weight", "bool", "creationTime"), STRINGS));
+
+ NodeState before = builder.getNodeState();
+ builder.child("test").setProperty("foo", "fox is jumping");
+ builder.child("test").setProperty("bar", "kite is flying");
+ builder.child("test").setProperty("price", 100);
+ builder.child("test").setProperty("weight", 10.0);
+ builder.child("test").setProperty("bool", true);
+ builder.child("test").setProperty("truth", true);
+ builder.child("test").setProperty("creationTime",
createDate("05/06/2014"));
+ NodeState after = builder.getNodeState();
+
+ NodeState indexed = HOOK.processCommit(before, after,
CommitInfo.EMPTY);
+ tracker.update(indexed);
+
+ assertNull("Fulltext search should not work", query("foo:fox"));
+ assertEquals("/test", getPath(new TermQuery(new Term("foo", "fox is
jumping"))));
+ assertNull("bar must NOT be indexed", getPath(new TermQuery(new
Term("bar", "kite is flying"))));
+
+ //Long
+ assertEquals("/test",
getPath(NumericRangeQuery.newDoubleRange("weight", 8D, 12D, true, true)));
+
+ //Double
+ assertEquals("/test", getPath(NumericRangeQuery.newLongRange("price",
100L, 100L, true, true)));
+
+ //Boolean
+ assertEquals("/test", getPath(new TermQuery(new Term("bool",
"true"))));
+ assertNull("truth must NOT be indexed", getPath(new TermQuery(new
Term("truth", "true"))));
+
+ //Date
+ assertEquals("/test",
getPath(NumericRangeQuery.newLongRange("creationTime",
+ dateToTime("05/05/2014"), dateToTime("05/07/2014"), true,
true)));
+ }
+
+ @After
+ public void releaseIndexNode(){
+ if(indexNode != null){
+ indexNode.release();
+ }
+ }
+
+ private String query(String query) throws IOException, ParseException {
+ QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
+ return getPath(queryParser.parse(query));
+ }
+
+ private String getPath(Query query) throws IOException {
+ TopDocs td = getSearcher().search(query, 100);
+ if (td.totalHits > 0){
+ if(td.totalHits > 1){
+ fail("More than 1 result found for query " + query);
+ }
+ return
getSearcher().getIndexReader().document(td.scoreDocs[0].doc).get(PATH);
+ }
+ return null;
+ }
+
+ private IndexSearcher getSearcher(){
+ if(indexNode == null){
+ indexNode = tracker.acquireIndexNode("/oak:index/lucene");
+ }
+ return indexNode.getSearcher();
+ }
+
+ private static Calendar createDate(String dt) throws
java.text.ParseException {
+ SimpleDateFormat sdf = new SimpleDateFormat("dd/mm/yyyy");
+ Calendar cal = Calendar.getInstance();
+ cal.setTime(sdf.parse(dt));
+ return cal;
+ }
+
+ private static long dateToTime(String dt) throws java.text.ParseException {
+ return FieldFactory.dateToLong(ISO8601.format(createDate(dt)));
+ }
+}
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
------------------------------------------------------------------------------
svn:eol-style = native