Author: thomasm
Date: Thu Jul 21 13:01:12 2016
New Revision: 1753682

URL: http://svn.apache.org/viewvc?rev=1753682&view=rev
Log:
OAK-4575 Oak 1.0.x fulltext search with ideographic space (U+3000) as separator

Modified:
    
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
    
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
    
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java

Modified: 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
 Thu Jul 21 13:01:12 2016
@@ -50,6 +50,9 @@ public class FullTextSearchImpl extends
      */
     public static final boolean JACKRABBIT_2_SINGLE_QUOTED_PHRASE = true;
 
+    private static final boolean REPLACE_IDEOGRAPHIC_SPACE = 
+            
Boolean.parseBoolean(System.getProperty("oak.queryReplaceIdeographicSpace", 
"true"));
+
     private final String selectorName;
     private final String relativePath;
     private final String propertyName;
@@ -84,6 +87,12 @@ public class FullTextSearchImpl extends
     public StaticOperandImpl getFullTextSearchExpression() {
         return fullTextSearchExpression;
     }
+    
+    private String getFullTextSearchCurrentString() {
+        String text = 
fullTextSearchExpression.currentValue().getValue(Type.STRING);
+        text = replaceIdeographicSpace(text);
+        return text;
+    }
 
     @Override
     boolean accept(AstVisitor v) {
@@ -129,7 +138,7 @@ public class FullTextSearchImpl extends
         if (!s.equals(selector)) {
             return null;
         }
-        PropertyValue v = fullTextSearchExpression.currentValue();
+        String text = getFullTextSearchCurrentString();
         try {
             String p = propertyName;
             if (relativePath != null) {
@@ -139,7 +148,7 @@ public class FullTextSearchImpl extends
                 p = PathUtils.concat(relativePath, p);
             }
             String p2 = normalizePropertyName(p);
-            return FullTextParser.parse(p2, v.getValue(Type.STRING));
+            return FullTextParser.parse(p2, text);
         } catch (ParseException e) {
             throw new IllegalArgumentException("Invalid expression: " + 
fullTextSearchExpression, e);
         }
@@ -249,7 +258,7 @@ public class FullTextSearchImpl extends
                 f.restrictProperty(p, Operator.NOT_EQUAL, null);
             }
         }
-        
f.restrictFulltextCondition(fullTextSearchExpression.currentValue().getValue(Type.STRING));
+        f.restrictFulltextCondition(getFullTextSearchCurrentString());
     }
 
     @Override
@@ -259,4 +268,18 @@ public class FullTextSearchImpl extends
         }
     }
 
+    /**
+     * Replace the ideographic space character (U+3000) with a simple space.
+     * See OAK-4575 for details.
+     * 
+     * @param text the original text
+     * @return the text, with U+3000 replaced
+     */
+    public static String replaceIdeographicSpace(String text) {
+        if (text == null || !REPLACE_IDEOGRAPHIC_SPACE) {
+            return text;
+        }
+        return text.replace('\u3000', ' ');
+    }
+
 }

Modified: 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
 Thu Jul 21 13:01:12 2016
@@ -18,8 +18,6 @@ package org.apache.jackrabbit.oak.query.
 
 import java.util.BitSet;
 import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 import com.google.common.base.Splitter;
@@ -38,7 +36,6 @@ import org.apache.jackrabbit.oak.query.a
 import org.apache.jackrabbit.oak.query.ast.OrImpl;
 import org.apache.jackrabbit.oak.spi.query.PropertyValues;
 
-import static com.google.common.collect.Maps.newHashMap;
 import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
 
 /**
@@ -118,7 +115,9 @@ public class SimpleExcerptProvider {
             FullTextSearchImpl f = (FullTextSearchImpl) c;
             if (f.getFullTextSearchExpression() instanceof LiteralImpl) {
                 LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression();
-                tokens.add(l.getLiteralValue().getValue(Type.STRING));
+                String token = l.getLiteralValue().getValue(Type.STRING);
+                token = FullTextSearchImpl.replaceIdeographicSpace(token);
+                tokens.add(token);
             }
         }
         if (c instanceof AndImpl) {

Modified: 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
 Thu Jul 21 13:01:12 2016
@@ -17,12 +17,16 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Iterator;
 
 import org.apache.jackrabbit.oak.Oak;
 import org.apache.jackrabbit.oak.api.ContentRepository;
+import org.apache.jackrabbit.oak.api.Result;
+import org.apache.jackrabbit.oak.api.ResultRow;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
@@ -30,14 +34,14 @@ import org.apache.jackrabbit.oak.query.A
 import org.apache.jackrabbit.oak.spi.commit.Observer;
 import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
 import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
-import org.junit.Ignore;
 import org.junit.Test;
 
 import static com.google.common.collect.ImmutableList.of;
 import static java.util.Arrays.asList;
-import static junit.framework.Assert.assertEquals;
+import static org.apache.jackrabbit.oak.api.QueryEngine.NO_BINDINGS;
 import static org.apache.jackrabbit.oak.api.Type.STRINGS;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -354,6 +358,26 @@ public class LuceneIndexQueryTest extend
     }
     
     @Test
+    public void ideographicSpace() throws Exception {
+        Tree t = root.getTree("/").addChild("ideographicSpace");
+        Tree one = t.addChild("one");
+        one.setProperty("a", "エア");
+        one.setProperty("b", "添付文書");
+        root.commit();
+        String explain = explainXpath("//*[jcr:contains(., 
'エア 添付文書')]");
+        System.out.println(explain);
+        assertQuery("//*[jcr:contains(., 'エア 添付文書')]", "xpath",
+                ImmutableList.of(one.getPath()));
+    }    
+    
+    private String explainXpath(String query) throws ParseException {
+        String explain = "explain " + query;
+        Result result = executeQuery(explain, "xpath", NO_BINDINGS);
+        ResultRow row = Iterables.getOnlyElement(result.getRows());
+        return row.getValue("plan").getValue(Type.STRING);
+    }
+    
+    @Test
     public void testMultiValuedPropUpdate() throws Exception {
         Tree test = root.getTree("/").addChild("test");
         String child = "child";


Reply via email to