Author: catholicon
Date: Tue Nov 17 16:25:59 2015
New Revision: 1714827

URL: http://svn.apache.org/viewvc?rev=1714827&view=rev
Log:
OAK-3648 Use StandardTokenizer instead of ClassicTokenizer in OakAnalyzer.
Also, added a test case to test correct behavior of analyzer for string with 
surrogate pairs

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1714827&r1=1714826&r2=1714827&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
 Tue Nov 17 16:25:59 2015
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.util.Version;
 

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1714827&r1=1714826&r2=1714827&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
 Tue Nov 17 16:25:59 2015
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
-import org.apache.lucene.analysis.standard.ClassicTokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 
 public class OakAnalyzer extends Analyzer {
@@ -43,7 +43,7 @@ public class OakAnalyzer extends Analyze
     @Override
     protected TokenStreamComponents createComponents(final String fieldName,
             final Reader reader) {
-        ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+        StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
         TokenStream tok = new LowerCaseFilter(matchVersion, src);
         tok = new WordDelimiterFilter(tok,
                 WordDelimiterFilter.GENERATE_WORD_PARTS

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java?rev=1714827&r1=1714826&r2=1714827&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryFulltextTest.java
 Tue Nov 17 16:25:59 2015
@@ -83,6 +83,31 @@ public class QueryFulltextTest extends A
 
     }
 
+    public void testSurrogateFulltext() throws Exception {
+        String surrogateString = "壱\n" +
+                "複数の文字\n" +
+                "カナポ\n" +
+                "ハンカクポ\n" +
+                "表十ソ\n" +
+                "\uD842\uDF9F\n" +
+                "Mixあポピ表\uD842\uDF9F" +
+                "";
+        String [] searchStrs = new String[]{"\uD842\uDF9F", "Mix"};
+        Session session = superuser;
+        QueryManager qm = session.getWorkspace().getQueryManager();
+        Node n1 = testRootNode.addNode("node");
+        n1.setProperty("text", surrogateString);
+        session.save();
+
+        for (String searchTerm : searchStrs) {
+            String sql2 = "select [jcr:path] as [path] from [nt:base] " +
+                    "where contains([text], '" + searchTerm + "') order by 
[jcr:path]";
+            Query q = qm.createQuery(sql2, Query.JCR_SQL2);
+            log.println("Testing" + searchTerm);
+            assertEquals("Lookup failed for " + searchTerm, "/testroot/node", 
getResult(q.execute(), "path"));
+        }
+    }
+
     public void testFulltextRelativeProperty() throws Exception {
         Session session = superuser;
         QueryManager qm = session.getWorkspace().getQueryManager();


Reply via email to