Author: thomasm
Date: Thu Jul 21 13:01:12 2016
New Revision: 1753682
URL: http://svn.apache.org/viewvc?rev=1753682&view=rev
Log:
OAK-4575 Oak 1.0.x fulltext search with ideographic space (U+3000) as separator
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
Thu Jul 21 13:01:12 2016
@@ -50,6 +50,9 @@ public class FullTextSearchImpl extends
*/
public static final boolean JACKRABBIT_2_SINGLE_QUOTED_PHRASE = true;
+ private static final boolean REPLACE_IDEOGRAPHIC_SPACE =
+
Boolean.parseBoolean(System.getProperty("oak.queryReplaceIdeographicSpace",
"true"));
+
private final String selectorName;
private final String relativePath;
private final String propertyName;
@@ -84,6 +87,12 @@ public class FullTextSearchImpl extends
public StaticOperandImpl getFullTextSearchExpression() {
return fullTextSearchExpression;
}
+
+ private String getFullTextSearchCurrentString() {
+ String text =
fullTextSearchExpression.currentValue().getValue(Type.STRING);
+ text = replaceIdeographicSpace(text);
+ return text;
+ }
@Override
boolean accept(AstVisitor v) {
@@ -129,7 +138,7 @@ public class FullTextSearchImpl extends
if (!s.equals(selector)) {
return null;
}
- PropertyValue v = fullTextSearchExpression.currentValue();
+ String text = getFullTextSearchCurrentString();
try {
String p = propertyName;
if (relativePath != null) {
@@ -139,7 +148,7 @@ public class FullTextSearchImpl extends
p = PathUtils.concat(relativePath, p);
}
String p2 = normalizePropertyName(p);
- return FullTextParser.parse(p2, v.getValue(Type.STRING));
+ return FullTextParser.parse(p2, text);
} catch (ParseException e) {
throw new IllegalArgumentException("Invalid expression: " +
fullTextSearchExpression, e);
}
@@ -249,7 +258,7 @@ public class FullTextSearchImpl extends
f.restrictProperty(p, Operator.NOT_EQUAL, null);
}
}
-
f.restrictFulltextCondition(fullTextSearchExpression.currentValue().getValue(Type.STRING));
+ f.restrictFulltextCondition(getFullTextSearchCurrentString());
}
@Override
@@ -259,4 +268,18 @@ public class FullTextSearchImpl extends
}
}
+ /**
+ * Replace the ideographic space character (U+3000) with a simple space.
+ * See OAK-4575 for details.
+ *
+ * @param text the original text
+ * @return the text, with U+3000 replaced
+ */
+ public static String replaceIdeographicSpace(String text) {
+ if (text == null || !REPLACE_IDEOGRAPHIC_SPACE) {
+ return text;
+ }
+ return text.replace('\u3000', ' ');
+ }
+
}
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
Thu Jul 21 13:01:12 2016
@@ -18,8 +18,6 @@ package org.apache.jackrabbit.oak.query.
import java.util.BitSet;
import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
import java.util.Set;
import com.google.common.base.Splitter;
@@ -38,7 +36,6 @@ import org.apache.jackrabbit.oak.query.a
import org.apache.jackrabbit.oak.query.ast.OrImpl;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;
-import static com.google.common.collect.Maps.newHashMap;
import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
/**
@@ -118,7 +115,9 @@ public class SimpleExcerptProvider {
FullTextSearchImpl f = (FullTextSearchImpl) c;
if (f.getFullTextSearchExpression() instanceof LiteralImpl) {
LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression();
- tokens.add(l.getLiteralValue().getValue(Type.STRING));
+ String token = l.getLiteralValue().getValue(Type.STRING);
+ token = FullTextSearchImpl.replaceIdeographicSpace(token);
+ tokens.add(token);
}
}
if (c instanceof AndImpl) {
Modified:
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1753682&r1=1753681&r2=1753682&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Thu Jul 21 13:01:12 2016
@@ -17,12 +17,16 @@
package org.apache.jackrabbit.oak.plugins.index.lucene;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.jackrabbit.oak.Oak;
import org.apache.jackrabbit.oak.api.ContentRepository;
+import org.apache.jackrabbit.oak.api.Result;
+import org.apache.jackrabbit.oak.api.ResultRow;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
@@ -30,14 +34,14 @@ import org.apache.jackrabbit.oak.query.A
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
-import org.junit.Ignore;
import org.junit.Test;
import static com.google.common.collect.ImmutableList.of;
import static java.util.Arrays.asList;
-import static junit.framework.Assert.assertEquals;
+import static org.apache.jackrabbit.oak.api.QueryEngine.NO_BINDINGS;
import static org.apache.jackrabbit.oak.api.Type.STRINGS;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -354,6 +358,26 @@ public class LuceneIndexQueryTest extend
}
@Test
+ public void ideographicSpace() throws Exception {
+ Tree t = root.getTree("/").addChild("ideographicSpace");
+ Tree one = t.addChild("one");
+ one.setProperty("a", "ã¨ã¢");
+ one.setProperty("b", "æ·»ä»ææ¸");
+ root.commit();
+ String explain = explainXpath("//*[jcr:contains(.,
'ã¨ã¢ãæ·»ä»ææ¸')]");
+ System.out.println(explain);
+ assertQuery("//*[jcr:contains(., 'ã¨ã¢ãæ·»ä»ææ¸')]", "xpath",
+ ImmutableList.of(one.getPath()));
+ }
+
+ private String explainXpath(String query) throws ParseException {
+ String explain = "explain " + query;
+ Result result = executeQuery(explain, "xpath", NO_BINDINGS);
+ ResultRow row = Iterables.getOnlyElement(result.getRows());
+ return row.getValue("plan").getValue(Type.STRING);
+ }
+
+ @Test
public void testMultiValuedPropUpdate() throws Exception {
Tree test = root.getTree("/").addChild("test");
String child = "child";