Author: alexparvulescu Date: Wed Sep 18 15:10:04 2013 New Revision: 1524443
URL: http://svn.apache.org/r1524443 Log: OAK-1022 Add a custom Oak Lucene analizer - added custom analyzer OakAnalizer based on lucene's StandardAnalyzer plus an oak filter - copied over lucene's CompoundWordTokenFilterBase, added the option to skip the current token - added filter that splits aplhanumeric tokens on '_' and '.' - tests Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (with props) jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java (with props) jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java (with props) Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1524443&r1=1524442&r2=1524443&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java Wed Sep 18 15:10:04 2013 @@ -615,7 +615,7 @@ public class LuceneIndex implements Full public boolean visit(FullTextTerm term) { String p = term.getPropertyName(); if (p != null && p.indexOf('/') >= 0) { - //do not add constraints on child nodes properties + // do not add constraints on child nodes properties p = "*"; } Query q = tokenToQuery(term.getText(), analyzer); @@ -686,7 +686,7 @@ public class LuceneIndex implements Full * @param analyzer * @return */ - private static List<String> tokenize(String text, Analyzer analyzer) { + static List<String> tokenize(String text, Analyzer analyzer) { List<String> tokens = new ArrayList<String>(); TokenStream stream = null; try { Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1524443&r1=1524442&r2=1524443&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java Wed Sep 18 15:10:04 2013 @@ -17,8 +17,6 @@ package org.apache.jackrabbit.oak.plugins.index.lucene; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.ClassicAnalyzer; -import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; public interface LuceneIndexConstants { @@ -29,7 +27,7 @@ public interface LuceneIndexConstants { Version VERSION = Version.LUCENE_43; - Analyzer ANALYZER = new ClassicAnalyzer(VERSION, CharArraySet.EMPTY_SET); + Analyzer ANALYZER = new OakAnalyzer(VERSION); /** * include only certain property types in the index Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1524443&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Wed Sep 18 15:10:04 2013 @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.jackrabbit.oak.plugins.index.lucene.util.OakWordTokenFilter; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; + +public class OakAnalyzer extends Analyzer { + + /** Default maximum allowed token length */ + public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; + + private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; + + private final Version matchVersion; + + /** + * Creates a new {@link OakAnalyzer} + * + * @param matchVersion + * Lucene version to match See + * {@link <a href="#version">above</a>} + */ + public OakAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; + } + + /** + * Set maximum allowed token length. If a token is seen that exceeds this + * length then it is discarded. This setting only takes effect the next time + * tokenStream or tokenStream is called. + */ + public void setMaxTokenLength(int length) { + maxTokenLength = length; + } + + /** + * @see #setMaxTokenLength + */ + public int getMaxTokenLength() { + return maxTokenLength; + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName, + final Reader reader) { + final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); + src.setMaxTokenLength(maxTokenLength); + TokenStream tok = new StandardFilter(matchVersion, src); + tok = new LowerCaseFilter(matchVersion, tok); + tok = new OakWordTokenFilter(matchVersion, tok); + return new TokenStreamComponents(src, tok) { + @Override + protected void setReader(final Reader reader) throws IOException { + src.setMaxTokenLength(OakAnalyzer.this.maxTokenLength); + super.setReader(reader); + } + }; + } +} Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java ------------------------------------------------------------------------------ svn:keywords = Author Date Id Revision Rev URL Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java?rev=1524443&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java Wed Sep 18 15:10:04 2013 @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.util; + +import java.io.IOException; +import java.util.LinkedList; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; + +/** + * Base class for decomposition token filters. + * <p> + * + * <a name="version"></a> + * You must specify the required {@link Version} compatibility when creating + * CompoundWordTokenFilterBase: + * <ul> + * <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 + * supplementary characters in strings and char arrays provided as compound word + * dictionaries. + * </ul> + */ +public abstract class CompoundWordTokenFilterBase extends TokenFilter { + /** + * The default for minimal word length that gets decomposed + */ + public static final int DEFAULT_MIN_WORD_SIZE = 5; + + /** + * The default for minimal length of subwords that get propagated to the output of this filter + */ + public static final int DEFAULT_MIN_SUBWORD_SIZE = 2; + + /** + * The default for maximal length of subwords that get propagated to the output of this filter + */ + public static final int DEFAULT_MAX_SUBWORD_SIZE = 15; + + protected final CharArraySet dictionary; + protected final LinkedList<CompoundToken> tokens; + protected final int minWordSize; + protected final int minSubwordSize; + protected final int maxSubwordSize; + protected final boolean onlyLongestMatch; + + protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + + private AttributeSource.State current; + + protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, boolean onlyLongestMatch) { + this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch); + } + + protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) { + this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false); + } + + protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) { + super(input); + + this.tokens=new LinkedList<CompoundToken>(); + if (minWordSize < 0) { + throw new IllegalArgumentException("minWordSize cannot be negative"); + } + this.minWordSize=minWordSize; + if (minSubwordSize < 0) { + throw new IllegalArgumentException("minSubwordSize cannot be negative"); + } + this.minSubwordSize=minSubwordSize; + if (maxSubwordSize < 0) { + throw new IllegalArgumentException("maxSubwordSize cannot be negative"); + } + this.maxSubwordSize=maxSubwordSize; + this.onlyLongestMatch=onlyLongestMatch; + this.dictionary = dictionary; + } + + @Override + public final boolean incrementToken() throws IOException { + if (!tokens.isEmpty()) { + assert current != null; + CompoundToken token = tokens.removeFirst(); + restoreState(current); // keep all other attributes untouched + termAtt.setEmpty().append(token.txt); + offsetAtt.setOffset(token.startOffset, token.endOffset); + posIncAtt.setPositionIncrement(0); + return true; + } + + current = null; // not really needed, but for safety + if (input.incrementToken()) { + // Only words longer than minWordSize get processed + if (termAtt.length() >= this.minWordSize) { + decompose(); + // only capture the state if we really need it for producing new tokens + if (!tokens.isEmpty()) { + current = captureState(); + + // DIFF TO ORIGINAL LUCENE CompoundWordTokenFilterBase + // update the attributes for the current subtoken + CompoundToken token = tokens.removeFirst(); + termAtt.setEmpty().append(token.txt); + offsetAtt.setOffset(token.startOffset, token.endOffset); + // -- END DIFF + } + } + // return original token: + return true; + } else { + return false; + } + } + + /** Decomposes the current {@link #termAtt} and places {@link CompoundToken} instances in the {@link #tokens} list. + * The original token may not be placed in the list, as it is automatically passed through this filter. + */ + protected abstract void decompose(); + + @Override + public void reset() throws IOException { + super.reset(); + tokens.clear(); + current = null; + } + + /** + * Helper class to hold decompounded token information + */ + protected class CompoundToken { + public final CharSequence txt; + public final int startOffset, endOffset; + + /** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}. */ + public CompoundToken(int offset, int length) { + this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length); + + // offsets of the original word + int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset(); + int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset(); + + if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) { + // if length by start + end offsets doesn't match the term text then assume + // this is a synonym and don't adjust the offsets. + this.startOffset = startOff; + this.endOffset = endOff; + } else { + final int newStart = startOff + offset; + this.startOffset = newStart; + this.endOffset = newStart + length; + } + } + + } +} + Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CompoundWordTokenFilterBase.java ------------------------------------------------------------------------------ svn:keywords = Author Date Id Revision Rev URL Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java?rev=1524443&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java Wed Sep 18 15:10:04 2013 @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.util; + +import java.util.Arrays; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.util.Version; + +public class OakWordTokenFilter extends CompoundWordTokenFilterBase { + + private static final String ALPHANUM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM]; + + private static final char[] SEPARATORS = new char[] { '_', '.' }; + + private final char[] separators; + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + + public OakWordTokenFilter(Version version, TokenStream in, char[] separators) { + super(version, in, null); + this.separators = separators; + Arrays.sort(this.separators); + } + + public OakWordTokenFilter(Version version, TokenStream in) { + this(version, in, SEPARATORS); + } + + @Override + protected void decompose() { + if (ALPHANUM_TYPE.equals(typeAtt.type())) { + final int len = termAtt.length(); + char[] buffer = termAtt.buffer(); + int tokenLen = 0; + boolean foundOne = false; + for (int i = 0; i < len; i++) { + if (Arrays.binarySearch(separators, buffer[i]) >= 0) { + foundOne = true; + if (tokenLen > 0) { + CompoundToken ct = new CompoundToken(i - tokenLen, + tokenLen); + tokens.add(ct); + } + tokenLen = 0; + } else { + tokenLen++; + } + } + // if there's no split, don't return anything, let the parent + // tokenizer return the full token + if (foundOne && tokenLen > 0) { + CompoundToken ct = new CompoundToken(len - tokenLen, tokenLen); + tokens.add(ct); + } + } + } +} \ No newline at end of file Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/OakWordTokenFilter.java ------------------------------------------------------------------------------ svn:keywords = Author Date Id Revision Rev URL Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1524443&r1=1524442&r2=1524443&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Wed Sep 18 15:10:04 2013 @@ -149,14 +149,27 @@ public class LuceneIndexQueryTest extend @Test public void containsPath() throws Exception { - String h = "/p1/p2/p3"; Tree test = root.getTree("/").addChild("test"); - test.addChild("a").setProperty("name", h); + test.addChild("a").setProperty("name", "/parent/child/node"); root.commit(); StringBuffer stmt = new StringBuffer(); - stmt.append("//*[jcr:contains(., '/p1/p2*')]"); + stmt.append("//*[jcr:contains(., '/parent/child')]"); + assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a")); + + } + + @Test + public void containsPathNum() throws Exception { + + Tree test = root.getTree("/").addChild("test"); + Tree a = test.addChild("a"); + a.setProperty("name", "/segment1/segment2/segment3"); + root.commit(); + + StringBuffer stmt = new StringBuffer(); + stmt.append("//*[jcr:contains(., '/segment1/segment2')]"); assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a")); } @@ -174,4 +187,17 @@ public class LuceneIndexQueryTest extend } + @Test + public void containsPathStrictNum() throws Exception { + root.getTree("/").addChild("matchOnPath1234"); + root.getTree("/").addChild("match_on_path1234"); + root.commit(); + + StringBuffer stmt = new StringBuffer(); + stmt.append("//*[jcr:contains(., 'match')]"); + assertQuery(stmt.toString(), "xpath", + ImmutableList.of("/match_on_path1234")); + + } + } Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java?rev=1524443&r1=1524442&r2=1524443&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java Wed Sep 18 15:10:04 2013 @@ -44,6 +44,7 @@ import org.apache.jackrabbit.oak.spi.sta import org.apache.lucene.analysis.Analyzer; import org.junit.Test; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; public class LuceneIndexTest { @@ -51,7 +52,8 @@ public class LuceneIndexTest { private static final Analyzer analyzer = LuceneIndexConstants.ANALYZER; private static final EditorHook HOOK = new EditorHook( - new IndexUpdateProvider(new LuceneIndexEditorProvider().with(analyzer))); + new IndexUpdateProvider( + new LuceneIndexEditorProvider().with(analyzer))); private NodeState root = new InitialContent().initialize(EMPTY_NODE); @@ -112,14 +114,15 @@ public class LuceneIndexTest { @Test public void testLucene3() throws Exception { NodeBuilder index = builder.child(INDEX_DEFINITIONS_NAME); - newLuceneIndexDefinition( - index, "lucene", ImmutableSet.of(PropertyType.TYPENAME_STRING)); + newLuceneIndexDefinition(index, "lucene", + ImmutableSet.of(PropertyType.TYPENAME_STRING)); NodeState before = builder.getNodeState(); builder.setProperty("foo", "bar"); builder.child("a").setProperty("foo", "bar"); builder.child("a").child("b").setProperty("foo", "bar", Type.NAME); - builder.child("a").child("b").child("c").setProperty("foo", "bar", Type.NAME); + builder.child("a").child("b").child("c") + .setProperty("foo", "bar", Type.NAME); NodeState after = builder.getNodeState(); @@ -143,7 +146,24 @@ public class LuceneIndexTest { NodeState types = system.getChildNode(JCR_NODE_TYPES); NodeState type = types.getChildNode(nodeTypeName); SelectorImpl selector = new SelectorImpl(type, nodeTypeName); - return new FilterImpl(selector, "SELECT * FROM [" + nodeTypeName + "]", null); + return new FilterImpl(selector, "SELECT * FROM [" + nodeTypeName + "]", + null); + } + + @Test + public void testTokens() { + assertEquals(ImmutableList.of("parent", "child"), + LuceneIndex.tokenize("/parent/child", analyzer)); + assertEquals(ImmutableList.of("p1234", "p5678"), + LuceneIndex.tokenize("/p1234/p5678", analyzer)); + assertEquals(ImmutableList.of("first", "second"), + LuceneIndex.tokenize("first_second", analyzer)); + assertEquals(ImmutableList.of("first1", "second2"), + LuceneIndex.tokenize("first1_second2", analyzer)); + assertEquals(ImmutableList.of("first", "second"), + LuceneIndex.tokenize("first. second", analyzer)); + assertEquals(ImmutableList.of("first", "second"), + LuceneIndex.tokenize("first.second", analyzer)); } }
