Author: chetanm
Date: Mon Oct 13 10:24:46 2014
New Revision: 1631334

URL: http://svn.apache.org/r1631334
Log:
OAK-2005 - Use separate Lucene index for performing property related queries

Making indexing side type aware.
- date would be stored as long with secs precision.
   Later would make precision level configurable

- if index is not fulltext then propertyNames would be
  checked for include list and no filtering on type would
  be performed

Added:
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
   (with props)
Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
 Mon Oct 13 10:24:46 2014
@@ -16,6 +16,12 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.primitives.Ints;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.util.ISO8601;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
@@ -41,6 +47,13 @@ public final class FieldFactory {
 
     private static final FieldType OAK_TYPE_NOT_STORED = new FieldType();
 
+    private static final int[] TYPABLE_TAGS = {
+            Type.DATE.tag(),
+            Type.BOOLEAN.tag(),
+            Type.DOUBLE.tag(),
+            Type.LONG.tag(),
+    };
+
     static {
         OAK_TYPE.setIndexed(true);
         OAK_TYPE.setOmitNorms(true);
@@ -55,6 +68,12 @@ public final class FieldFactory {
         OAK_TYPE_NOT_STORED.setIndexOptions(DOCS_AND_FREQS_AND_POSITIONS);
         OAK_TYPE_NOT_STORED.setTokenized(true);
         OAK_TYPE_NOT_STORED.freeze();
+
+        Arrays.sort(TYPABLE_TAGS);
+    }
+
+    public static boolean canCreateTypedField(Type<?> type) {
+        return Ints.contains(TYPABLE_TAGS, type.tag());
     }
 
     private final static class OakTextField extends Field {
@@ -86,4 +105,19 @@ public final class FieldFactory {
         return new TextField(FULLTEXT, value, NO);
     }
 
+    /**
+     * Date values are saved with sec resolution
+     * @param date jcr data string
+     * @return date value in seconds
+     */
+    public static Long dateToLong(String date){
+        if( date == null){
+            return null;
+        }
+        //TODO Should we change the precision to 5 min resolution
+        //TODO make if configurable as part of property definition
+        long millis = ISO8601.parse(date).getTimeInMillis();
+        return TimeUnit.MILLISECONDS.toSeconds(millis);
+    }
+
 }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
 Mon Oct 13 10:24:46 2014
@@ -28,6 +28,7 @@ import com.google.common.collect.Immutab
 import com.google.common.collect.Sets;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -109,6 +110,15 @@ public class IndexDefinition {
         return storageEnabled;
     }
 
+    public boolean skipTokenization(String propertyName) {
+        //If fulltext is not enabled then we never tokenize
+        //irrespective of property name
+        if (!isFullTextEnabled()) {
+            return true;
+        }
+        return LuceneIndexHelper.skipTokenization(propertyName);
+    }
+
     //~------------------------------------------< Internal >
 
     private static boolean getOptionalValue(NodeBuilder definition, String 
propName, boolean defaultVal){

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
 Mon Oct 13 10:24:46 2014
@@ -23,7 +23,6 @@ import static org.apache.jackrabbit.oak.
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
-import static 
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -42,7 +41,10 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.tika.metadata.Metadata;
@@ -191,24 +193,42 @@ public class LuceneIndexEditor implement
     }
 
     private Document makeDocument(String path, NodeState state, boolean 
isUpdate) throws CommitFailedException {
+        //TODO Possibly we can add support for compound properties like foo/bar
+        //i.e. support for relative path restrictions
+
         List<Field> fields = new ArrayList<Field>();
         boolean dirty = false;
         for (PropertyState property : state.getProperties()) {
             String pname = property.getName();
             if (isVisible(pname)
-                    && (context.getPropertyTypes() & (1 << property.getType()
-                            .tag())) != 0 && context.includeProperty(pname)) {
+                    && context.includeProperty(pname)) {
+
+                //In case of fulltext we also check if given type is enabled 
for indexing
+                //TODO Use context.includePropertyType however that cause 
issue. Need
+                //to make filtering based on type consistent both on indexing 
side and
+                //query size
+                if(context.isFullTextEnabled()
+                        && (context.getPropertyTypes() & (1 << 
property.getType()
+                        .tag())) == 0){
+                    continue;
+                }
+
                 if (Type.BINARY.tag() == property.getType().tag()) {
                     this.context.indexUpdate();
                     fields.addAll(newBinary(property, state));
                     dirty = true;
+                } else if(!context.isFullTextEnabled()
+                        && 
FieldFactory.canCreateTypedField(property.getType())){
+                    dirty = addTypedFields(fields, property);
                 } else {
                     for (String value : property.getValue(Type.STRINGS)) {
                         this.context.indexUpdate();
                         fields.add(newPropertyField(pname, value,
-                                !skipTokenization(pname),
+                                !context.skipTokenization(pname),
                                 context.isStored(pname)));
-                        fields.add(newFulltextField(value));
+                        if (context.isFullTextEnabled()) {
+                            fields.add(newFulltextField(value));
+                        }
                         dirty = true;
                     }
                 }
@@ -222,7 +242,9 @@ public class LuceneIndexEditor implement
         Document document = new Document();
         document.add(newPathField(path));
         String name = getName(path);
-        if (name != null) {
+
+        //TODO Possibly index nodeName without tokenization for node name 
based queries
+        if (context.isFullTextEnabled()) {
             document.add(newFulltextField(name));
         }
         for (Field f : fields) {
@@ -231,6 +253,34 @@ public class LuceneIndexEditor implement
         return document;
     }
 
+    private boolean addTypedFields(List<Field> fields, PropertyState property) 
throws CommitFailedException {
+        int tag = property.getType().tag();
+        String name = property.getName();
+        boolean fieldAdded = false;
+        for (int i = 0; i < property.count(); i++) {
+            Field f = null;
+            if (tag == Type.LONG.tag()) {
+                //TODO Distinguish fields which need to be used for search and 
for sort
+                //If a field is only used for Sort then it can be stored with 
less precision
+                f = new LongField(name, property.getValue(Type.LONG, i), 
Field.Store.NO);
+            } else if (tag == Type.DATE.tag()) {
+                String date = property.getValue(Type.DATE, i);
+                f = new LongField(name, FieldFactory.dateToLong(date), 
Field.Store.NO);
+            } else if (tag == Type.DOUBLE.tag()) {
+                f = new DoubleField(name, property.getValue(Type.DOUBLE, i), 
Field.Store.NO);
+            } else if (tag == Type.BOOLEAN.tag()) {
+                f = new StringField(name, property.getValue(Type.BOOLEAN, 
i).toString(), Field.Store.NO);
+            }
+
+            if (f != null) {
+                this.context.indexUpdate();
+                fields.add(f);
+                fieldAdded = true;
+            }
+        }
+        return fieldAdded;
+    }
+
     private static boolean isVisible(String name) {
         return name.charAt(0) != ':';
     }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1631334&r1=1631333&r2=1631334&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
 Mon Oct 13 10:24:46 2014
@@ -54,6 +54,7 @@ public class LuceneIndexEditorContext {
         try {
             IndexWriterConfig config = new IndexWriterConfig(VERSION, 
analyzer);
             config.setMergeScheduler(new SerialMergeScheduler());
+            //TODO Use default codec for index where full text index is not 
stored
             config.setCodec(new OakCodec());
             return config;
         } finally {
@@ -107,14 +108,14 @@ public class LuceneIndexEditorContext {
         this.updateCallback = updateCallback;
     }
 
-    int getPropertyTypes() {
-        return definition.getPropertyTypes();
-    }
-
     boolean includeProperty(String name) {
         return definition.includeProperty(name);
     }
 
+    boolean includePropertyType(int type){
+        return definition.includePropertyType(type);
+    }
+
     Parser getParser() {
         return parser;
     }
@@ -162,4 +163,15 @@ public class LuceneIndexEditorContext {
         return definition.isStored(name);
     }
 
+    public boolean isFullTextEnabled() {
+        return definition.isFullTextEnabled();
+    }
+
+    public boolean skipTokenization(String propertyName){
+        return definition.skipTokenization(propertyName);
+    }
+
+    public int getPropertyTypes() {
+        return definition.getPropertyTypes();
+    }
 }

Added: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java?rev=1631334&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
 Mon Oct 13 10:24:46 2014
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+
+import org.apache.jackrabbit.oak.plugins.index.IndexUpdateProvider;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EditorHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.test.ISO8601;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.junit.After;
+import org.junit.Test;
+
+import static com.google.common.collect.ImmutableSet.of;
+import static javax.jcr.PropertyType.TYPENAME_STRING;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
+import static 
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
+import static 
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_NAMES;
+import static 
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
+import static 
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition;
+import static 
org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
+import static 
org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent.INITIAL_CONTENT;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+
+public class LuceneIndexEditorTest {
+    private static final Analyzer analyzer = LuceneIndexConstants.ANALYZER;
+
+    private static final EditorHook HOOK = new EditorHook(
+            new IndexUpdateProvider(
+                    new LuceneIndexEditorProvider().with(analyzer)));
+
+    private NodeState root = INITIAL_CONTENT;
+
+    private NodeBuilder builder = root.builder();
+
+    private IndexTracker tracker = new IndexTracker();
+
+    private IndexNode indexNode;
+
+    @Test
+    public void testLuceneWithFullText() throws Exception {
+        NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME);
+        newLuceneIndexDefinition(index, "lucene",
+                of(TYPENAME_STRING));
+
+        NodeState before = builder.getNodeState();
+        builder.child("test").setProperty("foo", "fox is jumping");
+        builder.child("test").setProperty("price", 100);
+        NodeState after = builder.getNodeState();
+
+        NodeState indexed = HOOK.processCommit(before, after, 
CommitInfo.EMPTY);
+        tracker.update(indexed);
+
+        assertEquals("/test", query("foo:fox"));
+        assertNull("Non string properties not indexed by default",
+                getPath(NumericRangeQuery.newLongRange("price", 100L, 100L, 
true, true)));
+    }
+
+    @Test
+    public void testLuceneWithNonFullText() throws Exception {
+        NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME);
+        NodeBuilder nb = newLuceneIndexDefinition(index, "lucene",
+                of(TYPENAME_STRING));
+        nb.setProperty(LuceneIndexConstants.FULL_TEXT_ENABLED, false);
+        nb.setProperty(createProperty(INCLUDE_PROPERTY_NAMES, of("foo", 
"price", "weight", "bool", "creationTime"), STRINGS));
+
+        NodeState before = builder.getNodeState();
+        builder.child("test").setProperty("foo", "fox is jumping");
+        builder.child("test").setProperty("bar", "kite is flying");
+        builder.child("test").setProperty("price", 100);
+        builder.child("test").setProperty("weight", 10.0);
+        builder.child("test").setProperty("bool", true);
+        builder.child("test").setProperty("truth", true);
+        builder.child("test").setProperty("creationTime", 
createDate("05/06/2014"));
+        NodeState after = builder.getNodeState();
+
+        NodeState indexed = HOOK.processCommit(before, after, 
CommitInfo.EMPTY);
+        tracker.update(indexed);
+
+        assertNull("Fulltext search should not work", query("foo:fox"));
+        assertEquals("/test", getPath(new TermQuery(new Term("foo", "fox is 
jumping"))));
+        assertNull("bar must NOT be indexed", getPath(new TermQuery(new 
Term("bar", "kite is flying"))));
+
+        //Long
+        assertEquals("/test", 
getPath(NumericRangeQuery.newDoubleRange("weight", 8D, 12D, true, true)));
+
+        //Double
+        assertEquals("/test", getPath(NumericRangeQuery.newLongRange("price", 
100L, 100L, true, true)));
+
+        //Boolean
+        assertEquals("/test", getPath(new TermQuery(new Term("bool", 
"true"))));
+        assertNull("truth must NOT be indexed", getPath(new TermQuery(new 
Term("truth", "true"))));
+
+        //Date
+        assertEquals("/test", 
getPath(NumericRangeQuery.newLongRange("creationTime",
+                dateToTime("05/05/2014"), dateToTime("05/07/2014"), true, 
true)));
+    }
+
+    @After
+    public void releaseIndexNode(){
+        if(indexNode != null){
+            indexNode.release();
+        }
+    }
+
+    private String query(String query) throws IOException, ParseException {
+        QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
+        return getPath(queryParser.parse(query));
+    }
+
+    private String getPath(Query query) throws IOException {
+        TopDocs td = getSearcher().search(query, 100);
+        if (td.totalHits > 0){
+            if(td.totalHits > 1){
+                fail("More than 1 result found for query " + query);
+            }
+            return 
getSearcher().getIndexReader().document(td.scoreDocs[0].doc).get(PATH);
+        }
+        return null;
+    }
+
+    private IndexSearcher getSearcher(){
+        if(indexNode == null){
+            indexNode = tracker.acquireIndexNode("/oak:index/lucene");
+        }
+        return indexNode.getSearcher();
+    }
+
+    private static Calendar createDate(String dt) throws 
java.text.ParseException {
+        SimpleDateFormat sdf = new SimpleDateFormat("dd/mm/yyyy");
+        Calendar cal = Calendar.getInstance();
+        cal.setTime(sdf.parse(dt));
+        return cal;
+    }
+
+    private static long dateToTime(String dt) throws java.text.ParseException {
+        return FieldFactory.dateToLong(ISO8601.format(createDate(dt)));
+    }
+}

Propchange: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to