Author: alexparvulescu Date: Tue Mar 25 21:38:29 2014 New Revision: 1581537
URL: http://svn.apache.org/r1581537 Log: OAK-1614 Oak Analyzer can't tokenize chinese phrases Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1581537&r1=1581536&r2=1581537&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java Tue Mar 25 21:38:29 2014 @@ -25,7 +25,7 @@ public interface LuceneIndexConstants { String INDEX_DATA_CHILD_NAME = ":data"; - Version VERSION = Version.LUCENE_46; + Version VERSION = Version.LUCENE_47; Analyzer ANALYZER = new OakAnalyzer(VERSION); Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1581537&r1=1581536&r2=1581537&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Tue Mar 25 21:38:29 2014 @@ -21,8 +21,8 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; +import org.apache.lucene.analysis.standard.ClassicTokenizer; import org.apache.lucene.util.Version; public class OakAnalyzer extends Analyzer { @@ -43,13 +43,12 @@ public class OakAnalyzer extends Analyze @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { - WhitespaceTokenizer src = new WhitespaceTokenizer(matchVersion, reader); + ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader); TokenStream tok = new LowerCaseFilter(matchVersion, src); tok = new WordDelimiterFilter(tok, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null); - return new TokenStreamComponents(src, tok); } } Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java?rev=1581537&r1=1581536&r2=1581537&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/TextExtractionQueryTest.java Tue Mar 25 21:38:29 2014 @@ -77,6 +77,7 @@ public class TextExtractionQueryTest ext } } + @SuppressWarnings("deprecation") private void assertContainsQuery(String statement, boolean match) throws InvalidQueryException, RepositoryException { StringBuffer stmt = new StringBuffer(); Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1581537&r1=1581536&r2=1581537&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Tue Mar 25 21:38:29 2014 @@ -284,4 +284,14 @@ public class LuceneIndexQueryTest extend assertFalse(strings.hasNext()); } + @Test + public void testTokenizeCN() throws Exception { + Tree t = root.getTree("/").addChild("containsCN"); + Tree one = t.addChild("one"); + one.setProperty("t", "ç¾å¥³è¡¬è¡«"); + root.commit(); + assertQuery("//*[jcr:contains(., 'ç¾å¥³')]", "xpath", + ImmutableList.of(one.getPath())); + } + }
