Author: alexparvulescu Date: Mon Mar 3 21:35:17 2014 New Revision: 1573761
URL: http://svn.apache.org/r1573761 Log: OAK-1487 LuceneIndex support for relative properties - impl and tests, and some cleanup Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TermFactory.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexAggregationTest.java Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/pom.xml?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/pom.xml (original) +++ jackrabbit/oak/trunk/oak-lucene/pom.xml Mon Mar 3 21:35:17 2014 @@ -50,13 +50,10 @@ org.apache.jackrabbit.core.query.DerefTest#testRewrite <!-- OAK-321 --> org.apache.jackrabbit.core.query.DerefTest#testDerefToVersionNode <!-- OAK-321 --> org.apache.jackrabbit.core.query.DerefTest#testMultipleDeref <!-- OAK-321 --> - org.apache.jackrabbit.core.query.FulltextQueryTest#testContainsPropScopeSQL <!-- OAK-902 --> - org.apache.jackrabbit.core.query.FulltextQueryTest#testContainsPropScopeXPath <!-- OAK-902 --> org.apache.jackrabbit.core.query.XPathAxisTest#testIndex0Descendant <!-- OAK-322 --> org.apache.jackrabbit.core.query.XPathAxisTest#testIndex1Descendant <!-- OAK-322 --> org.apache.jackrabbit.core.query.XPathAxisTest#testIndex2Descendant <!-- OAK-322 --> org.apache.jackrabbit.core.query.XPathAxisTest#testIndex3Descendant <!-- OAK-322 --> - org.apache.jackrabbit.core.query.SQL2QueryResultTest#testSQL2SelectColums <!-- OAK-874 --> org.apache.jackrabbit.core.query.SimpleQueryTest#testGeneralComparison <!-- OAK-327 --> org.apache.jackrabbit.core.query.FnNameQueryTest#testLikeWithPrefix <!-- OAK-328 --> org.apache.jackrabbit.core.query.UpperLowerCaseQueryTest#testInvalidQuery <!-- OAK-329 --> @@ -89,7 +86,7 @@ org.apache.jackrabbit.core.query.SimpleQueryTest#testLikePatternEscaped <!-- OAK-327 --> org.apache.jackrabbit.core.query.SimpleQueryTest#testNegativeNumber <!-- OAK-327 --> org.apache.jackrabbit.core.query.SQL2OrderByTest#testOrderByScore - org.apache.jackrabbit.oak.jcr.query.QueryFulltextTest#testFulltextRelativeProperty <!-- OAK-828 --> + org.apache.jackrabbit.core.query.FulltextQueryTest#testMultipleOrExpressions <!-- TODO --> </known.issues> </properties> Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java Mon Mar 3 21:35:17 2014 @@ -17,8 +17,10 @@ package org.apache.jackrabbit.oak.plugins.index.lucene; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo.IndexOptions; import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH; import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.FULLTEXT; @@ -32,6 +34,26 @@ import static org.apache.lucene.document public final class FieldFactory { /** + * StringField#TYPE_NOT_STORED but tokenized + */ + private static final FieldType OAK_TYPE = new FieldType(); + + static { + OAK_TYPE.setIndexed(true); + OAK_TYPE.setOmitNorms(true); + OAK_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + OAK_TYPE.setTokenized(true); + OAK_TYPE.freeze(); + } + + private final static class OakTextField extends Field { + + public OakTextField(String name, String value) { + super(name, value, OAK_TYPE); + } + } + + /** * Private constructor. */ private FieldFactory() { @@ -41,9 +63,11 @@ public final class FieldFactory { return new StringField(PATH, path, YES); } - public static Field newPropertyField(String name, String value) { - // TODO do we need norms info on the indexed fields ? TextField:StringField - // return new TextField(name, value, NO); + public static Field newPropertyField(String name, String value, + boolean tokenized) { + if (tokenized) { + return new OakTextField(name, value); + } return new StringField(name, value, NO); } Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java Mon Mar 3 21:35:17 2014 @@ -16,6 +16,34 @@ */ package org.apache.jackrabbit.oak.plugins.index.lucene; +import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES; +import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; +import static org.apache.jackrabbit.oak.api.Type.STRING; +import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; +import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath; +import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth; +import static org.apache.jackrabbit.oak.commons.PathUtils.getName; +import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; +import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH; +import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH_SELECTOR; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_OAK; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION; +import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm; +import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm; +import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization; +import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH; +import static org.apache.jackrabbit.oak.spi.query.Cursors.newPathCursor; +import static org.apache.lucene.search.BooleanClause.Occur.MUST; +import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT; +import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; + import java.io.File; import java.io.IOException; import java.io.StringReader; @@ -26,9 +54,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; -import org.apache.jackrabbit.JcrConstants; -import org.apache.jackrabbit.oak.api.Type; -import org.apache.jackrabbit.oak.commons.PathUtils; + import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator; import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd; import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression; @@ -36,7 +62,6 @@ import org.apache.jackrabbit.oak.query.f import org.apache.jackrabbit.oak.query.fulltext.FullTextTerm; import org.apache.jackrabbit.oak.query.fulltext.FullTextVisitor; import org.apache.jackrabbit.oak.spi.query.Cursor; -import org.apache.jackrabbit.oak.spi.query.Cursors; import org.apache.jackrabbit.oak.spi.query.Filter; import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction; import org.apache.jackrabbit.oak.spi.query.QueryIndex; @@ -72,31 +97,11 @@ import org.apache.lucene.search.Wildcard import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES; -import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; -import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME; -import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; -import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH; -import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH_SELECTOR; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_OAK; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE; -import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm; -import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm; -import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH; -import static org.apache.lucene.search.BooleanClause.Occur.MUST; -import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT; -import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; - /** * Provides a QueryIndex that does lookups against a Lucene-based index * @@ -222,8 +227,8 @@ public class LuceneIndex implements Full relPaths.add(""); } else if (p.startsWith("../") || p.startsWith("./")) { throw new IllegalArgumentException("Relative parent is not supported:" + p); - } else if (PathUtils.getDepth(p) > 1) { - String parent = PathUtils.getParentPath(p); + } else if (getDepth(p) > 1) { + String parent = getParentPath(p); relPaths.add(parent); } else { relPaths.add(""); @@ -339,7 +344,7 @@ public class LuceneIndex implements Full boolean nonFullTextConstraints = parent.isEmpty(); Directory directory = newDirectory(root); if (directory == null) { - return Cursors.newPathCursor(Collections.<String> emptySet()); + return newPathCursor(Collections.<String> emptySet()); } long s = System.currentTimeMillis(); try { @@ -351,10 +356,9 @@ public class LuceneIndex implements Full Query query = getQuery(filter, reader, nonFullTextConstraints, analyzer); -// TODO OAK-828 -// HashSet<String> seenPaths = new HashSet<String>(); -// int parentDepth = PathUtils.getDepth(parent); - + // TODO OAK-828 + HashSet<String> seenPaths = new HashSet<String>(); + int parentDepth = getDepth(parent); if (query != null) { // OAK-925 // TODO how to best avoid loading all entries in memory? @@ -368,22 +372,21 @@ public class LuceneIndex implements Full if ("".equals(path)) { path = "/"; } - -// TODO OAK-828 -// if (!parent.isEmpty()) { -// // ensure the path ends with the given -// // relative path -// if (!path.endsWith("/" + parent)) { -// continue; -// } -// // get the base path -// path = PathUtils.getAncestorPath(path, parentDepth); -// // avoid duplicate entries -// if (seenPaths.contains(path)) { -// continue; -// } -// seenPaths.add(path); -// } + if (!parent.isEmpty()) { + // TODO OAK-828 this breaks node aggregation + // get the base path + // ensure the path ends with the given + // relative path + // if (!path.endsWith("/" + parent)) { + // continue; + // } + path = getAncestorPath(path, parentDepth); + // avoid duplicate entries + if (seenPaths.contains(path)) { + continue; + } + seenPaths.add(path); + } paths.add(path); } @@ -391,7 +394,7 @@ public class LuceneIndex implements Full } LOG.debug("query via {} took {} ms.", this, System.currentTimeMillis() - s); - return Cursors.newPathCursor(paths); + return newPathCursor(paths); } finally { reader.close(); } @@ -400,7 +403,7 @@ public class LuceneIndex implements Full } } catch (IOException e) { LOG.warn("query via {} failed.", this, e); - return Cursors.newPathCursor(Collections.<String> emptySet()); + return newPathCursor(Collections.<String> emptySet()); } } @@ -428,7 +431,7 @@ public class LuceneIndex implements Full } PropertyRestriction pr = filter.getPropertyRestriction(NATIVE_QUERY_FUNCTION); if (pr != null) { - QueryParser queryParser = new QueryParser(Version.LUCENE_46, "", new OakAnalyzer(Version.LUCENE_46)); + QueryParser queryParser = new QueryParser(VERSION, "", analyzer); String query = String.valueOf(pr.first.getValue(pr.first.getType())); try { qs.add(queryParser.parse(query)); @@ -437,7 +440,7 @@ public class LuceneIndex implements Full } } else if (nonFullTextConstraints) { - addNonFullTextConstraints(qs, filter, reader); + addNonFullTextConstraints(qs, filter, reader, analyzer); } if (qs.size() == 0) { return new MatchAllDocsQuery(); @@ -453,7 +456,7 @@ public class LuceneIndex implements Full } private static void addNonFullTextConstraints(List<Query> qs, - Filter filter, IndexReader reader) { + Filter filter, IndexReader reader, Analyzer analyzer) { if (!filter.matchesAllTypes()) { addNodeTypeConstraints(qs, filter); } @@ -479,13 +482,13 @@ public class LuceneIndex implements Full qs.add(new TermQuery(newPathTerm(path))); break; case PARENT: - if (PathUtils.denotesRoot(path)) { + if (denotesRoot(path)) { // there's no parent of the root node // we add a path that can not possibly occur because there // is no way to say "match no documents" in Lucene qs.add(new TermQuery(new Term(FieldNames.PATH, "///"))); } else { - qs.add(new TermQuery(newPathTerm(PathUtils.getParentPath(path)))); + qs.add(new TermQuery(newPathTerm(getParentPath(path)))); } break; case NO_RESTRICTION: @@ -508,8 +511,13 @@ public class LuceneIndex implements Full if ("rep:excerpt".equals(name)) { continue; } - // TODO OAK-985 - if (JcrConstants.JCR_PRIMARYTYPE.equals(name)) { + if (JCR_PRIMARYTYPE.equals(name)) { + continue; + } + + if (skipTokenization(name)) { + qs.add(new TermQuery(new Term(name, pr.first + .getValue(STRING)))); continue; } @@ -519,11 +527,11 @@ public class LuceneIndex implements Full // TODO what to do with escaped tokens? if (pr.first != null) { - first = pr.first.getValue(Type.STRING); + first = pr.first.getValue(STRING); first = first.replace("\\", ""); } if (pr.last != null) { - last = pr.last.getValue(Type.STRING); + last = pr.last.getValue(STRING); last = last.replace("\\", ""); } @@ -561,17 +569,31 @@ public class LuceneIndex implements Full if ("*".equals(name)) { addReferenceConstraint(first, qs, reader); } else { - qs.add(new TermQuery(new Term(name, first))); + for (String t : tokenize(first, analyzer)) { + qs.add(new TermQuery(new Term(name, t))); + } } } continue; } + first = tokenizeAndPoll(first, analyzer); + last = tokenizeAndPoll(last, analyzer); qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding)); } } + private static String tokenizeAndPoll(String token, Analyzer analyzer){ + if (token != null) { + List<String> tokens = tokenize(token, analyzer); + if (!tokens.isEmpty()) { + token = tokens.get(0); + } + } + return token; + } + private static void addReferenceConstraint(String uuid, List<Query> qs, IndexReader reader) { if (reader == null) { @@ -640,10 +662,9 @@ public class LuceneIndex implements Full public boolean visit(FullTextTerm term) { String p = term.getPropertyName(); if (p != null && p.indexOf('/') >= 0) { - // do not add constraints on child nodes properties - p = "*"; + p = getName(p); } - Query q = tokenToQuery(term.getText(), analyzer, reader); + Query q = tokenToQuery(term.getText(), p, analyzer, reader); if (q == null) { return false; } @@ -664,12 +685,11 @@ public class LuceneIndex implements Full return result.get(); } - static Query tokenToQuery(String text, Analyzer analyzer, IndexReader reader) { + static Query tokenToQuery(String text, String fieldName, Analyzer analyzer, IndexReader reader) { if (analyzer == null) { return null; } - List<String> tokens = new ArrayList<String>(); - tokens = tokenize(text, analyzer); + List<String> tokens = tokenize(text, analyzer); if (tokens.isEmpty()) { // TODO what should be returned in the case there are no tokens? @@ -678,9 +698,9 @@ public class LuceneIndex implements Full if (tokens.size() == 1) { String token = tokens.iterator().next(); if (hasFulltextToken(token)) { - return new WildcardQuery(newFulltextTerm(token)); + return new WildcardQuery(newFulltextTerm(token, fieldName)); } else { - return new TermQuery(newFulltextTerm(token)); + return new TermQuery(newFulltextTerm(token, fieldName)); } } else { if (hasFulltextToken(tokens)) { @@ -692,14 +712,14 @@ public class LuceneIndex implements Full mpq.add(terms); } } else { - mpq.add(newFulltextTerm(token)); + mpq.add(newFulltextTerm(token, fieldName)); } } return mpq; } else { PhraseQuery pq = new PhraseQuery(); for (String t : tokens) { - pq.add(newFulltextTerm(t)); + pq.add(newFulltextTerm(t, fieldName)); } return pq; } Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Mon Mar 3 21:35:17 2014 @@ -23,6 +23,7 @@ import static org.apache.jackrabbit.oak. import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField; import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField; import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm; +import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization; import java.io.IOException; import java.io.InputStream; @@ -201,7 +202,8 @@ public class LuceneIndexEditor implement } else { for (String value : property.getValue(Type.STRINGS)) { this.context.indexUpdate(); - document.add(newPropertyField(pname, value)); + document.add(newPropertyField(pname, value, + !skipTokenization(pname))); document.add(newFulltextField(value)); dirty = true; } Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TermFactory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TermFactory.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TermFactory.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TermFactory.java Mon Mar 3 21:35:17 2014 @@ -45,6 +45,13 @@ public final class TermFactory { return new Term(FieldNames.PATH, path); } + public static Term newFulltextTerm(String ft, String field) { + if (field == null || "*".equals(field)) { + return newFulltextTerm(ft); + } + return new Term(field, ft); + } + public static Term newFulltextTerm(String ft) { return new Term(FieldNames.FULLTEXT, ft); } Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/LuceneIndexHelper.java Mon Mar 3 21:35:17 2014 @@ -16,35 +16,51 @@ */ package org.apache.jackrabbit.oak.plugins.index.lucene.util; +import static com.google.common.collect.ImmutableSet.of; +import static com.google.common.collect.Sets.newHashSet; +import static javax.jcr.PropertyType.TYPENAME_BINARY; +import static javax.jcr.PropertyType.TYPENAME_STRING; import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; +import static org.apache.jackrabbit.JcrConstants.JCR_UUID; import static org.apache.jackrabbit.oak.api.Type.NAME; import static org.apache.jackrabbit.oak.api.Type.STRINGS; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ASYNC_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NODE_TYPE; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME; import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; -import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.EXCLUDE_PROPERTY_NAMES; +import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE; import static org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty; +import static org.apache.jackrabbit.oak.spi.security.user.UserConstants.GROUP_PROPERTY_NAMES; +import static org.apache.jackrabbit.oak.spi.security.user.UserConstants.USER_PROPERTY_NAMES; import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import javax.jcr.PropertyType; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; -import com.google.common.collect.ImmutableSet; - public class LuceneIndexHelper { - public static final Set<String> JR_PROPERTY_INCLUDES = ImmutableSet.of( - PropertyType.TYPENAME_STRING, PropertyType.TYPENAME_BINARY); + public static final Set<String> JR_PROPERTY_INCLUDES = of(TYPENAME_STRING, + TYPENAME_BINARY); + + /** + * Nodes that represent content that shold not be tokenized (like UUIDs, + * etc) + * + */ + private final static Set<String> NOT_TOKENIZED = newHashSet(JCR_UUID); + + static { + NOT_TOKENIZED.addAll(USER_PROPERTY_NAMES); + NOT_TOKENIZED.addAll(GROUP_PROPERTY_NAMES); + } private LuceneIndexHelper() { } @@ -114,4 +130,12 @@ public class LuceneIndexHelper { } return index; } + + /** + * Nodes that represent UUIDs and shold not be tokenized + * + */ + public static boolean skipTokenization(String name) { + return NOT_TOKENIZED.contains(name); + } } Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java Mon Mar 3 21:35:17 2014 @@ -32,7 +32,7 @@ import org.apache.jackrabbit.core.query. * Tests the fulltext index. */ public class QueryFulltextTest extends AbstractQueryTest { - + public void testFulltext() throws Exception { Session session = superuser; QueryManager qm = session.getWorkspace().getQueryManager(); @@ -43,35 +43,15 @@ public class QueryFulltextTest extends A Node n3 = testRootNode.addNode("node3"); n3.setProperty("text", "hello hallo"); session.save(); - - String sql2 = "select [jcr:path] as [path] from [nt:base] " + - "where contains([text], 'hello OR hallo') order by [jcr:path]"; - - Query q; - - q = qm.createQuery("explain " + sql2, Query.JCR_SQL2); -// TODO the plan should actually be: -// assertEquals("[nt:base] as [nt:base] /* " + -// "+((text:hallo text:hello)~1) +text:{* TO *} " + -// "ft:(text:\"hallo\" OR text:\"hello\") " + -// "where contains([nt:base].[text], cast('hello OR hallo' as string)) */", -// getResult(q.execute(), "plan")); - assertEquals("[nt:base] as [nt:base] /* " + - "aggregate :fulltext:hallo :fulltext:hello " + - "ft:(text:\"hallo\" OR text:\"hello\") " + - "where contains([nt:base].[text], cast('hello OR hallo' as string)) */", - getResult(q.execute(), "plan")); - // lowercase "or" mean search for the term "or" - sql2 = "select [jcr:path] as [path] from [nt:base] " + + String sql2 = "select [jcr:path] as [path] from [nt:base] " + "where contains([text], 'hello or hallo') order by [jcr:path]"; - q = qm.createQuery(sql2, Query.JCR_SQL2); - assertEquals("", - getResult(q.execute(), "path")); + Query q = qm.createQuery(sql2, Query.JCR_SQL2); + assertEquals("", getResult(q.execute(), "path")); } - + public void testFulltextRelativeProperty() throws Exception { Session session = superuser; QueryManager qm = session.getWorkspace().getQueryManager(); @@ -89,75 +69,28 @@ public class QueryFulltextTest extends A "where ISCHILDNODE([/testroot])" + " AND CONTAINS(text, 'hallo')"; - q = qm.createQuery("explain " + sql2, Query.JCR_SQL2); - // TODO the plan should actually be: -// assertEquals("[nt:base] as [nt:base] /* " + -// "+text:hallo +:path:/testroot/* +text:{* TO *} " + -// "ft:(text:\"hallo\") " + -// "where (ischildnode([nt:base], [/testroot])) " + -// "and (contains([nt:base].[text], cast('hallo' as string))) */", -// getResult(q.execute(), "plan")); - assertEquals("[nt:base] as [nt:base] /* " + - "aggregate +:fulltext:hallo* +:path:/testroot/* +text:{* TO *}" + - "ft:(text:\"hallo\") " + - "where (ischildnode([nt:base], [/testroot])) " + - "and (contains([nt:base].[text], cast('hallo' as string))) */", - getResult(q.execute(), "plan")); - q = qm.createQuery(sql2, Query.JCR_SQL2); assertEquals("/testroot/node2, /testroot/node3", getResult(q.execute(), "path")); - - sql2 = "select [jcr:path] as [path] from [nt:base] " + - "where contains([node1/text], 'hello') order by [jcr:path]"; - -// q = qm.createQuery("explain " + sql2, Query.JCR_SQL2); -// assertEquals("[nt:base] as [nt:base] /* " + -// "+text:hallo +:path:/testroot/* +text:{* TO *} " + -// "ft:(text:\"hallo\") " + -// "where (ischildnode([nt:base], [/testroot])) " + -// "and (contains([nt:base].[text], cast('hallo' as string))) */", -// getResult(q.execute(), "plan")); + sql2 = "select [jcr:path] as [path] from [nt:base] " + + "where contains([node1/text], 'hello') order by [jcr:path]"; q = qm.createQuery(sql2, Query.JCR_SQL2); assertEquals("/testroot", getResult(q.execute(), "path")); - - sql2 = "select [jcr:path] as [path] from [nt:base] " + - "where contains([node2/text], 'hello OR hallo') order by [jcr:path]"; - q = qm.createQuery("explain " + sql2, Query.JCR_SQL2); - // TODO the plan should actually be: -// assertEquals("[nt:base] as [nt:base] /* " + -// "(text:hallo text:hello)~1 " + -// "ft:(node2/text:\"hallo\" OR node2/text:\"hello\") " + -// "parent:node2 " + -// "where contains([nt:base].[node2/text], cast('hello OR hallo' as string)) */", -// getResult(q.execute(), "plan")); - assertEquals("[nt:base] as [nt:base] /* " + - "aggregate :fulltext:hallo* :fulltext:hello* " + - "ft:(node2/text:\"hallo\" OR node2/text:\"hello\") " + - "parent:node2 " + - "where contains([nt:base].[node2/text], cast('hello OR hallo' as string)) */", - getResult(q.execute(), "plan")); + + sql2 = "select [jcr:path] as [path] from [nt:base] " + + "where contains([node2/text], 'hello OR hallo') order by [jcr:path]"; q = qm.createQuery(sql2, Query.JCR_SQL2); - assertEquals("/testroot", - getResult(q.execute(), "path")); - - sql2 = "select [jcr:path] as [path] from [nt:base] " + - "where contains([node1/text], 'hello') " + - "and contains([node2/text], 'hallo') " + - "order by [jcr:path]"; - q = qm.createQuery("explain " + sql2, Query.JCR_SQL2); + assertEquals("/testroot", getResult(q.execute(), "path")); + // TODO OAK-890 - assertEquals("[nt:base] as [nt:base] /* " + - "aggregate Not yet implemented " + - "where (contains([nt:base].[node1/text], cast('hello' as string))) " + - "and (contains([nt:base].[node2/text], cast('hallo' as string))) */", - getResult(q.execute(), "plan")); - q = qm.createQuery(sql2, Query.JCR_SQL2); - // assertEquals("/testroot", - // getResult(q.execute(), "path")); - + // sql2 = "select [jcr:path] as [path] from [nt:base] " + // + "where contains([node1/text], 'hello') " + // + "and contains([node2/text], 'hallo') " + // + "order by [jcr:path]"; + // q = qm.createQuery(sql2, Query.JCR_SQL2); + // assertEquals("/testroot", getResult(q.execute(), "path")); } - + static String getResult(QueryResult result, String propertyName) throws RepositoryException { StringBuilder buff = new StringBuilder(); RowIterator it = result.getRows(); Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexAggregationTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexAggregationTest.java?rev=1573761&r1=1573760&r2=1573761&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexAggregationTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexAggregationTest.java Mon Mar 3 21:35:17 2014 @@ -279,6 +279,35 @@ public class LuceneIndexAggregationTest } + + @Test + public void testChildNodeProperty2() throws Exception { + + Tree file = root.getTree("/").addChild("myFile"); + file.setProperty(JCR_PRIMARYTYPE, NT_FILE, Type.NAME); + Tree resource = file.addChild(JCR_CONTENT); + resource.setProperty(JCR_PRIMARYTYPE, "nt:resource", Type.NAME); + resource.setProperty(binaryProperty(JCR_DATA, + "the quick brown fox jumps over the lazy dog.")); + resource.setProperty("jcr:title", "title"); + resource.setProperty("jcr:description", "description"); + + Tree file2 = root.getTree("/").addChild("myFile2"); + file2.setProperty(JCR_PRIMARYTYPE, NT_FILE, Type.NAME); + Tree resource2 = file2.addChild(JCR_CONTENT); + resource2.setProperty(JCR_PRIMARYTYPE, "nt:resource", Type.NAME); + resource2.setProperty(binaryProperty(JCR_DATA, + "the quick brown fox jumps over the lazy dog.")); + resource2.setProperty("jcr:title", "other"); + resource.setProperty("jcr:description", "title"); + + root.commit(); + + String matchChildSimple = "//*[( jcr:contains(jcr:content/@jcr:title, 'title') )]"; + assertQuery(matchChildSimple, "xpath", ImmutableList.of("/myFile")); + + } + @Test public void testPreventDoubleAggregation() throws Exception { Tree file = root.getTree("/").addChild("myFile");
