Author: thomasm
Date: Wed Jul 10 10:36:52 2013
New Revision: 1501713
URL: http://svn.apache.org/r1501713
Log:
OAK-890 Query: advanced fulltext search conditions (refactoring: move to a new
package, and support boost)
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextAnd.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextExpression.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextOr.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextParser.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/LikePattern.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
- copied, changed from r1499285,
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java
Removed:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/ComparisonImpl.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/LikePatternTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1501713&r1=1501712&r2=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
Wed Jul 10 10:36:52 2013
@@ -21,6 +21,7 @@ import org.apache.jackrabbit.oak.api.Pro
import org.apache.jackrabbit.oak.api.ResultRow;
import org.apache.jackrabbit.oak.query.ast.ColumnImpl;
import org.apache.jackrabbit.oak.query.ast.SelectorImpl;
+import org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;
/**
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/ComparisonImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/ComparisonImpl.java?rev=1501713&r1=1501712&r2=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/ComparisonImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/ComparisonImpl.java
Wed Jul 10 10:36:52 2013
@@ -26,6 +26,7 @@ import javax.jcr.PropertyType;
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.query.fulltext.LikePattern;
import org.apache.jackrabbit.oak.query.index.FilterImpl;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;
@@ -209,171 +210,4 @@ public class ComparisonImpl extends Cons
}
}
- /**
- * A pattern matcher.
- */
- public static class LikePattern {
-
- // TODO LIKE: optimize condition to '=' when no patterns are used, or
'between x and x+1'
- // TODO LIKE: what to do for invalid patterns (patterns ending with a
backslash)
-
- private static final int MATCH = 0, ONE = 1, ANY = 2;
-
- private String patternString;
- private boolean invalidPattern;
- private char[] patternChars;
- private int[] patternTypes;
- private int patternLength;
- private String lowerBounds, upperBound;
-
- public LikePattern(String pattern) {
- initPattern(pattern);
- initBounds();
- }
-
- public boolean matches(String value) {
- return !invalidPattern && compareAt(value, 0, 0, value.length(),
patternChars, patternTypes);
- }
-
- private static boolean compare(char[] pattern, String s, int pi, int
si) {
- return pattern[pi] == s.charAt(si);
- }
-
- private boolean compareAt(String s, int pi, int si, int sLen, char[]
pattern, int[] types) {
- for (; pi < patternLength; pi++) {
- int type = types[pi];
- switch (type) {
- case MATCH:
- if (si >= sLen || !compare(pattern, s, pi, si++)) {
- return false;
- }
- break;
- case ONE:
- if (si++ >= sLen) {
- return false;
- }
- break;
- case ANY:
- if (++pi >= patternLength) {
- return true;
- }
- while (si < sLen) {
- if (compare(pattern, s, pi, si) && compareAt(s, pi,
si, sLen, pattern, types)) {
- return true;
- }
- si++;
- }
- return false;
- default:
- throw new IllegalArgumentException("Internal error: " +
type);
- }
- }
- return si == sLen;
- }
-
- private void initPattern(String p) {
- patternLength = 0;
- if (p == null) {
- patternTypes = null;
- patternChars = null;
- return;
- }
- int len = p.length();
- patternChars = new char[len];
- patternTypes = new int[len];
- boolean lastAny = false;
- for (int i = 0; i < len; i++) {
- char c = p.charAt(i);
- int type;
- if (c == '\\') {
- if (i >= len - 1) {
- invalidPattern = true;
- return;
- }
- c = p.charAt(++i);
- type = MATCH;
- lastAny = false;
- } else if (c == '%') {
- if (lastAny) {
- continue;
- }
- type = ANY;
- lastAny = true;
- } else if (c == '_') {
- type = ONE;
- } else {
- type = MATCH;
- lastAny = false;
- }
- patternTypes[patternLength] = type;
- patternChars[patternLength++] = c;
- }
- for (int i = 0; i < patternLength - 1; i++) {
- if (patternTypes[i] == ANY && patternTypes[i + 1] == ONE) {
- patternTypes[i] = ONE;
- patternTypes[i + 1] = ANY;
- }
- }
- patternString = new String(patternChars, 0, patternLength);
- }
-
- @Override
- public String toString() {
- return patternString;
- }
-
- /**
- * Get the lower bound if any.
- *
- * @return return the lower bound, or null if unbound
- */
- public String getLowerBound() {
- return lowerBounds;
- }
-
- /**
- * Get the upper bound if any.
- *
- * @return return the upper bound, or null if unbound
- */
- public String getUpperBound() {
- return upperBound;
- }
-
- private void initBounds() {
- if (invalidPattern) {
- return;
- }
- if (patternLength <= 0 || patternTypes[0] != MATCH) {
- // can't use an index
- return;
- }
- int maxMatch = 0;
- StringBuilder buff = new StringBuilder();
- while (maxMatch < patternLength && patternTypes[maxMatch] ==
MATCH) {
- buff.append(patternChars[maxMatch++]);
- }
- String lower = buff.toString();
- if (lower.isEmpty()) {
- return;
- }
- if (maxMatch == patternLength) {
- lowerBounds = upperBound = lower;
- return;
- }
- lowerBounds = lower;
- char next = lower.charAt(lower.length() - 1);
- // search the 'next' unicode character (or at least a character
- // that is higher)
- for (int i = 1; i < 2000; i++) {
- String upper = lower.substring(0, lower.length() - 1) + (char)
(next + i);
- if (upper.compareTo(lower) > 0) {
- upperBound = upper;
- return;
- }
- }
- }
-
- }
-
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java?rev=1501713&r1=1501712&r2=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java
Wed Jul 10 10:36:52 2013
@@ -22,7 +22,6 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.api.Type.STRINGS;
import java.text.ParseException;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
@@ -32,7 +31,8 @@ import org.apache.jackrabbit.oak.api.Pro
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
-import org.apache.jackrabbit.oak.query.ast.ComparisonImpl.LikePattern;
+import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
+import org.apache.jackrabbit.oak.query.fulltext.FullTextParser;
import org.apache.jackrabbit.oak.query.index.FilterImpl;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;
import org.apache.jackrabbit.oak.spi.query.QueryIndex.FulltextQueryIndex;
@@ -200,282 +200,4 @@ public class FullTextSearchImpl extends
}
}
- /**
- * A parser for fulltext condition literals. The grammar is defined in the
- * <a href="http://www.day.com/specs/jcr/2.0/6_Query.html#6.7.19">
- * JCR 2.0 specification, 6.7.19 FullTextSearch</a>,
- * as follows (a bit simplified):
- * <pre>
- * FullTextSearchLiteral ::= Disjunct {' OR ' Disjunct}
- * Disjunct ::= Term {' ' Term}
- * Term ::= ['-'] SimpleTerm
- * SimpleTerm ::= Word | '"' Word {' ' Word} '"'
- * </pre>
- */
- public static class FullTextParser {
-
- String text;
- int parseIndex;
-
- public static FullTextExpression parse(String text) throws
ParseException {
- FullTextParser p = new FullTextParser();
- p.text = text;
- FullTextExpression e = p.parseOr();
- return e;
- }
-
- FullTextExpression parseOr() throws ParseException {
- FullTextOr or = new FullTextOr();
- or.list.add(parseAnd());
- while (parseIndex < text.length()) {
- if (text.substring(parseIndex).startsWith("OR ")) {
- parseIndex += 3;
- or.list.add(parseAnd());
- } else {
- break;
- }
- }
- return or.simplify();
- }
-
- FullTextExpression parseAnd() throws ParseException {
- FullTextAnd and = new FullTextAnd();
- and.list.add(parseTerm());
- while (parseIndex < text.length()) {
- if (text.substring(parseIndex).startsWith("OR ")) {
- break;
- }
- and.list.add(parseTerm());
- }
- return and.simplify();
- }
-
- FullTextExpression parseTerm() throws ParseException {
- if (parseIndex >= text.length()) {
- throw getSyntaxError("term");
- }
- boolean not = false;
- StringBuilder buff = new StringBuilder();
- char c = text.charAt(parseIndex);
- if (c == '-') {
- if (++parseIndex >= text.length()) {
- throw getSyntaxError("term");
- }
- not = true;
- }
- boolean escaped = false;
- if (c == '\"') {
- parseIndex++;
- while (true) {
- if (parseIndex >= text.length()) {
- throw getSyntaxError("double quote");
- }
- c = text.charAt(parseIndex++);
- if (c == '\\') {
- escaped = true;
- if (parseIndex >= text.length()) {
- throw getSyntaxError("escaped char");
- }
- c = text.charAt(parseIndex++);
- buff.append(c);
- } else if (c == '\"') {
- if (parseIndex < text.length() &&
text.charAt(parseIndex) != ' ') {
- throw getSyntaxError("space");
- }
- parseIndex++;
- break;
- } else {
- buff.append(c);
- }
- }
- } else {
- do {
- c = text.charAt(parseIndex++);
- if (c == '\\') {
- escaped = true;
- if (parseIndex >= text.length()) {
- throw getSyntaxError("escaped char");
- }
- c = text.charAt(parseIndex++);
- buff.append(c);
- } else if (c == ' ') {
- break;
- } else {
- buff.append(c);
- }
- } while (parseIndex < text.length());
- }
- if (buff.length() == 0) {
- throw getSyntaxError("term");
- }
- String text = buff.toString();
- FullTextTerm term = new FullTextTerm(text, not, escaped);
- return term.simplify();
- }
-
- private ParseException getSyntaxError(String expected) {
- int index = Math.max(0, Math.min(parseIndex, text.length() - 1));
- String query = text.substring(0, index) + "(*)" +
text.substring(index).trim();
- if (expected != null) {
- query += "; expected: " + expected;
- }
- return new ParseException("FullText expression: " + query, index);
- }
-
- }
-
- /**
- * The base class for fulltext condition expression.
- */
- public abstract static class FullTextExpression {
- public abstract boolean evaluate(String value);
- abstract FullTextExpression simplify();
- }
-
- /**
- * A fulltext "and" condition.
- */
- static class FullTextAnd extends FullTextExpression {
- ArrayList<FullTextExpression> list = new
ArrayList<FullTextExpression>();
-
- @Override
- public boolean evaluate(String value) {
- for (FullTextExpression e : list) {
- if (!e.evaluate(value)) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- FullTextExpression simplify() {
- return list.size() == 1 ? list.get(0) : this;
- }
-
- @Override
- public String toString() {
- StringBuilder buff = new StringBuilder();
- int i = 0;
- for (FullTextExpression e : list) {
- if (i++ > 0) {
- buff.append(' ');
- }
- buff.append(e.toString());
- }
- return buff.toString();
- }
-
- }
-
- /**
- * A fulltext "or" condition.
- */
- static class FullTextOr extends FullTextExpression {
- ArrayList<FullTextExpression> list = new
ArrayList<FullTextExpression>();
-
- @Override
- public boolean evaluate(String value) {
- for (FullTextExpression e : list) {
- if (e.evaluate(value)) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- FullTextExpression simplify() {
- return list.size() == 1 ? list.get(0).simplify() : this;
- }
-
- @Override
- public String toString() {
- StringBuilder buff = new StringBuilder();
- int i = 0;
- for (FullTextExpression e : list) {
- if (i++ > 0) {
- buff.append(" OR ");
- }
- buff.append(e.toString());
- }
- return buff.toString();
- }
-
- }
-
- /**
- * A fulltext term, or a "not" term.
- */
- static class FullTextTerm extends FullTextExpression {
- private final boolean not;
- private final String text;
- private final String filteredText;
- private final LikePattern like;
-
- FullTextTerm(String text, boolean not, boolean escaped) {
- this.text = text;
- this.not = not;
- // for testFulltextIntercapSQL
- // filter special characters such as '
- // to make tests pass, for example the
- // FulltextQueryTest.testFulltextExcludeSQL,
- // which searches for:
- // "text ''fox jumps'' -other"
- // (please note the two single quotes instead of
- // double quotes before for and after jumps)
- boolean pattern = false;
- if (escaped) {
- filteredText = text;
- } else {
- StringBuilder buff = new StringBuilder();
- for (int i = 0; i < text.length(); i++) {
- char c = text.charAt(i);
- if (c == '*') {
- buff.append('%');
- pattern = true;
- } else if (c == '?') {
- buff.append('_');
- pattern = true;
- } else if (c == '_') {
- buff.append("\\_");
- pattern = true;
- } else if (Character.isLetterOrDigit(c) || "
+-:&".indexOf(c) >= 0) {
- buff.append(c);
- }
- }
- this.filteredText = buff.toString().toLowerCase();
- }
- if (pattern) {
- like = new LikePattern("%" + filteredText + "%");
- } else {
- like = null;
- }
- }
-
- @Override
- public boolean evaluate(String value) {
- // for testFulltextIntercapSQL
- value = value.toLowerCase();
- if (like != null) {
- return like.matches(value);
- }
- if (not) {
- return value.indexOf(filteredText) < 0;
- }
- return value.indexOf(filteredText) >= 0;
- }
-
- @Override
- FullTextExpression simplify() {
- return this;
- }
-
- @Override
- public String toString() {
- return (not ? "-" : "") + "\"" + text.replaceAll("\"", "\\\"") +
"\"";
- }
-
- }
-
}
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextAnd.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextAnd.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextAnd.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextAnd.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+import java.util.ArrayList;
+
+/**
+ * A fulltext "and" condition.
+ */
+public class FullTextAnd extends FullTextExpression {
+
+ public ArrayList<FullTextExpression> list = new
ArrayList<FullTextExpression>();
+
+ @Override
+ public boolean evaluate(String value) {
+ for (FullTextExpression e : list) {
+ if (!e.evaluate(value)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public FullTextExpression simplify() {
+ return list.size() == 1 ? list.get(0) : this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buff = new StringBuilder();
+ int i = 0;
+ for (FullTextExpression e : list) {
+ if (i++ > 0) {
+ buff.append(' ');
+ }
+ if (e.getPrecedence() < getPrecedence()) {
+ buff.append('(');
+ }
+ buff.append(e.toString());
+ if (e.getPrecedence() < getPrecedence()) {
+ buff.append(')');
+ }
+ }
+ return buff.toString();
+ }
+
+ @Override
+ public int getPrecedence() {
+ return PRECEDENCE_AND;
+ }
+
+}
\ No newline at end of file
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextExpression.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextExpression.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextExpression.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextExpression.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+/**
+ * The base class for fulltext condition expression.
+ */
+public abstract class FullTextExpression {
+
+ public static final int PRECEDENCE_OR = 1, PRECEDENCE_AND = 2,
PRECEDENCE_TERM = 3;
+
+ public abstract int getPrecedence();
+ public abstract boolean evaluate(String value);
+ abstract FullTextExpression simplify();
+
+}
\ No newline at end of file
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextOr.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextOr.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextOr.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextOr.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+import java.util.ArrayList;
+
+/**
+ * A fulltext "or" condition.
+ */
+public class FullTextOr extends FullTextExpression {
+ public ArrayList<FullTextExpression> list = new
ArrayList<FullTextExpression>();
+
+ @Override
+ public boolean evaluate(String value) {
+ for (FullTextExpression e : list) {
+ if (e.evaluate(value)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public FullTextExpression simplify() {
+ return list.size() == 1 ? list.get(0).simplify() : this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buff = new StringBuilder();
+ int i = 0;
+ for (FullTextExpression e : list) {
+ if (i++ > 0) {
+ buff.append(" OR ");
+ }
+ if (e.getPrecedence() < getPrecedence()) {
+ buff.append('(');
+ }
+ buff.append(e.toString());
+ if (e.getPrecedence() < getPrecedence()) {
+ buff.append(')');
+ }
+ }
+ return buff.toString();
+ }
+
+ @Override
+ public int getPrecedence() {
+ return PRECEDENCE_OR;
+ }
+
+}
\ No newline at end of file
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextParser.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextParser.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextParser.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextParser.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+import java.text.ParseException;
+
+
+/**
+ * A parser for fulltext condition literals. The grammar is defined in the
+ * <a href="http://www.day.com/specs/jcr/2.0/6_Query.html#6.7.19">
+ * JCR 2.0 specification, 6.7.19 FullTextSearch</a>,
+ * as follows (a bit simplified):
+ * <pre>
+ * FullTextSearchLiteral ::= Disjunct {' OR ' Disjunct}
+ * Disjunct ::= Term {' ' Term}
+ * Term ::= ['-'] SimpleTerm
+ * SimpleTerm ::= Word | '"' Word {' ' Word} '"'
+ * </pre>
+ */
+public class FullTextParser {
+
+ String text;
+ int parseIndex;
+
+ public static FullTextExpression parse(String text) throws ParseException {
+ FullTextParser p = new FullTextParser();
+ p.text = text;
+ FullTextExpression e = p.parseOr();
+ return e;
+ }
+
+ FullTextExpression parseOr() throws ParseException {
+ FullTextOr or = new FullTextOr();
+ or.list.add(parseAnd());
+ while (parseIndex < text.length()) {
+ if (text.substring(parseIndex).startsWith("OR ")) {
+ parseIndex += 3;
+ or.list.add(parseAnd());
+ } else {
+ break;
+ }
+ }
+ return or.simplify();
+ }
+
+ FullTextExpression parseAnd() throws ParseException {
+ FullTextAnd and = new FullTextAnd();
+ and.list.add(parseTerm());
+ while (parseIndex < text.length()) {
+ if (text.substring(parseIndex).startsWith("OR ")) {
+ break;
+ }
+ and.list.add(parseTerm());
+ }
+ return and.simplify();
+ }
+
+ FullTextExpression parseTerm() throws ParseException {
+ if (parseIndex >= text.length()) {
+ throw getSyntaxError("term");
+ }
+ boolean not = false;
+ StringBuilder buff = new StringBuilder();
+ char c = text.charAt(parseIndex);
+ if (c == '-') {
+ if (++parseIndex >= text.length()) {
+ throw getSyntaxError("term");
+ }
+ not = true;
+ }
+ boolean escaped = false;
+ String boost = null;
+ if (c == '\"') {
+ parseIndex++;
+ while (true) {
+ if (parseIndex >= text.length()) {
+ throw getSyntaxError("double quote");
+ }
+ c = text.charAt(parseIndex++);
+ if (c == '\\') {
+ escaped = true;
+ if (parseIndex >= text.length()) {
+ throw getSyntaxError("escaped char");
+ }
+ c = text.charAt(parseIndex++);
+ buff.append(c);
+ } else if (c == '\"') {
+ if (parseIndex < text.length()) {
+ if (text.charAt(parseIndex) == '^') {
+ boost = "";
+ } else if (text.charAt(parseIndex) != ' ') {
+ throw getSyntaxError("space");
+ }
+ }
+ parseIndex++;
+ break;
+ } else {
+ buff.append(c);
+ }
+ }
+ } else {
+ do {
+ c = text.charAt(parseIndex++);
+ if (c == '\\') {
+ escaped = true;
+ if (parseIndex >= text.length()) {
+ throw getSyntaxError("escaped char");
+ }
+ c = text.charAt(parseIndex++);
+ buff.append(c);
+ } else if (c == '^') {
+ boost = "";
+ break;
+ } else if (c == ' ') {
+ break;
+ } else {
+ buff.append(c);
+ }
+ } while (parseIndex < text.length());
+ }
+ if (boost != null) {
+ StringBuilder b = new StringBuilder();
+ while (parseIndex < text.length()) {
+ c = text.charAt(parseIndex++);
+ if ((c < '0' || c > '9') && c != '.') {
+ break;
+ }
+ b.append(c);
+ }
+ boost = b.toString();
+ }
+ if (buff.length() == 0) {
+ throw getSyntaxError("term");
+ }
+ String text = buff.toString();
+ FullTextTerm term = new FullTextTerm(text, not, escaped, boost);
+ return term.simplify();
+ }
+
+ private ParseException getSyntaxError(String expected) {
+ int index = Math.max(0, Math.min(parseIndex, text.length() - 1));
+ String query = text.substring(0, index) + "(*)" +
text.substring(index).trim();
+ if (expected != null) {
+ query += "; expected: " + expected;
+ }
+ return new ParseException("FullText expression: " + query, index);
+ }
+
+}
\ No newline at end of file
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/FullTextTerm.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+/**
+ * A fulltext term, or a "not" term.
+ */
+public class FullTextTerm extends FullTextExpression {
+ private final boolean not;
+ private final String text;
+ private final String filteredText;
+ private final String boost;
+ private final LikePattern like;
+
+ public FullTextTerm(String text, boolean not, boolean escaped, String
boost) {
+ this.text = text;
+ this.not = not;
+ this.boost = boost;
+ // for testFulltextIntercapSQL
+ // filter special characters such as '
+ // to make tests pass, for example the
+ // FulltextQueryTest.testFulltextExcludeSQL,
+ // which searches for:
+ // "text ''fox jumps'' -other"
+ // (please note the two single quotes instead of
+ // double quotes before for and after jumps)
+ boolean pattern = false;
+ if (escaped) {
+ filteredText = text;
+ } else {
+ StringBuilder buff = new StringBuilder();
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ if (c == '*') {
+ buff.append('%');
+ pattern = true;
+ } else if (c == '?') {
+ buff.append('_');
+ pattern = true;
+ } else if (c == '_') {
+ buff.append("\\_");
+ pattern = true;
+ } else if (Character.isLetterOrDigit(c) || " +-:&".indexOf(c)
>= 0) {
+ buff.append(c);
+ }
+ }
+ this.filteredText = buff.toString().toLowerCase();
+ }
+ if (pattern) {
+ like = new LikePattern("%" + filteredText + "%");
+ } else {
+ like = null;
+ }
+ }
+
+ @Override
+ public boolean evaluate(String value) {
+ // for testFulltextIntercapSQL
+ value = value.toLowerCase();
+ if (like != null) {
+ return like.matches(value);
+ }
+ if (not) {
+ return value.indexOf(filteredText) < 0;
+ }
+ return value.indexOf(filteredText) >= 0;
+ }
+
+ @Override
+ public
+ FullTextExpression simplify() {
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buff = new StringBuilder();
+ if (not) {
+ buff.append('-');
+ }
+ buff.append('\"');
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ if (c == '\\') {
+ buff.append(c);
+ } else if (c == '\"') {
+ buff.append('\\');
+ }
+ buff.append(c);
+ }
+ buff.append('\"');
+ if (boost != null) {
+ buff.append('^').append(boost);
+ }
+ return buff.toString();
+ }
+
+ @Override
+ public int getPrecedence() {
+ return PRECEDENCE_TERM;
+ }
+
+}
\ No newline at end of file
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/LikePattern.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/LikePattern.java?rev=1501713&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/LikePattern.java
(added)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/LikePattern.java
Wed Jul 10 10:36:52 2013
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.query.fulltext;
+
+/**
+ * A pattern matcher.
+ */
+public class LikePattern {
+
+ // TODO LIKE: optimize condition to '=' when no patterns are used, or
'between x and x+1'
+ // TODO LIKE: what to do for invalid patterns (patterns ending with a
backslash)
+
+ private static final int MATCH = 0, ONE = 1, ANY = 2;
+
+ private String patternString;
+ private boolean invalidPattern;
+ private char[] patternChars;
+ private int[] patternTypes;
+ private int patternLength;
+ private String lowerBounds, upperBound;
+
+ public LikePattern(String pattern) {
+ initPattern(pattern);
+ initBounds();
+ }
+
+ public boolean matches(String value) {
+ return !invalidPattern && compareAt(value, 0, 0, value.length(),
patternChars, patternTypes);
+ }
+
+ private static boolean compare(char[] pattern, String s, int pi, int si) {
+ return pattern[pi] == s.charAt(si);
+ }
+
+ private boolean compareAt(String s, int pi, int si, int sLen, char[]
pattern, int[] types) {
+ for (; pi < patternLength; pi++) {
+ int type = types[pi];
+ switch (type) {
+ case MATCH:
+ if (si >= sLen || !compare(pattern, s, pi, si++)) {
+ return false;
+ }
+ break;
+ case ONE:
+ if (si++ >= sLen) {
+ return false;
+ }
+ break;
+ case ANY:
+ if (++pi >= patternLength) {
+ return true;
+ }
+ while (si < sLen) {
+ if (compare(pattern, s, pi, si) && compareAt(s, pi, si,
sLen, pattern, types)) {
+ return true;
+ }
+ si++;
+ }
+ return false;
+ default:
+ throw new IllegalArgumentException("Internal error: " + type);
+ }
+ }
+ return si == sLen;
+ }
+
+ private void initPattern(String p) {
+ patternLength = 0;
+ if (p == null) {
+ patternTypes = null;
+ patternChars = null;
+ return;
+ }
+ int len = p.length();
+ patternChars = new char[len];
+ patternTypes = new int[len];
+ boolean lastAny = false;
+ for (int i = 0; i < len; i++) {
+ char c = p.charAt(i);
+ int type;
+ if (c == '\\') {
+ if (i >= len - 1) {
+ invalidPattern = true;
+ return;
+ }
+ c = p.charAt(++i);
+ type = MATCH;
+ lastAny = false;
+ } else if (c == '%') {
+ if (lastAny) {
+ continue;
+ }
+ type = ANY;
+ lastAny = true;
+ } else if (c == '_') {
+ type = ONE;
+ } else {
+ type = MATCH;
+ lastAny = false;
+ }
+ patternTypes[patternLength] = type;
+ patternChars[patternLength++] = c;
+ }
+ for (int i = 0; i < patternLength - 1; i++) {
+ if (patternTypes[i] == ANY && patternTypes[i + 1] == ONE) {
+ patternTypes[i] = ONE;
+ patternTypes[i + 1] = ANY;
+ }
+ }
+ patternString = new String(patternChars, 0, patternLength);
+ }
+
+ @Override
+ public String toString() {
+ return patternString;
+ }
+
+ /**
+ * Get the lower bound if any.
+ *
+ * @return return the lower bound, or null if unbound
+ */
+ public String getLowerBound() {
+ return lowerBounds;
+ }
+
+ /**
+ * Get the upper bound if any.
+ *
+ * @return return the upper bound, or null if unbound
+ */
+ public String getUpperBound() {
+ return upperBound;
+ }
+
+ private void initBounds() {
+ if (invalidPattern) {
+ return;
+ }
+ if (patternLength <= 0 || patternTypes[0] != MATCH) {
+ // can't use an index
+ return;
+ }
+ int maxMatch = 0;
+ StringBuilder buff = new StringBuilder();
+ while (maxMatch < patternLength && patternTypes[maxMatch] == MATCH) {
+ buff.append(patternChars[maxMatch++]);
+ }
+ String lower = buff.toString();
+ if (lower.isEmpty()) {
+ return;
+ }
+ if (maxMatch == patternLength) {
+ lowerBounds = upperBound = lower;
+ return;
+ }
+ lowerBounds = lower;
+ char next = lower.charAt(lower.length() - 1);
+ // search the 'next' unicode character (or at least a character
+ // that is higher)
+ for (int i = 1; i < 2000; i++) {
+ String upper = lower.substring(0, lower.length() - 1) + (char)
(next + i);
+ if (upper.compareTo(lower) > 0) {
+ upperBound = upper;
+ return;
+ }
+ }
+ }
+
+}
\ No newline at end of file
Copied:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
(from r1499285,
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java)
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?p2=jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java&p1=jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java&r1=1499285&r2=1501713&rev=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
Wed Jul 10 10:36:52 2013
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.jackrabbit.oak.query;
+package org.apache.jackrabbit.oak.query.fulltext;
import java.util.ArrayList;
import java.util.List;
@@ -23,6 +23,7 @@ import org.apache.jackrabbit.oak.api.Pro
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.query.Query;
import org.apache.jackrabbit.oak.query.ast.AndImpl;
import org.apache.jackrabbit.oak.query.ast.ConstraintImpl;
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java?rev=1501713&r1=1501712&r2=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/FullTextTest.java
Wed Jul 10 10:36:52 2013
@@ -23,6 +23,8 @@ import static org.junit.Assert.fail;
import java.text.ParseException;
+import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
+import org.apache.jackrabbit.oak.query.fulltext.FullTextParser;
import org.junit.Test;
/**
@@ -32,6 +34,8 @@ public class FullTextTest {
@Test
public void and() throws ParseException {
+ assertEquals("\"hello\" \"world\"", convertPattern("hello world"));
+ assertEquals("\"hello\" \"or\" \"world\"", convertPattern("hello or
world"));
assertFalse(test("hello world", "hello"));
assertFalse(test("hello world", "world"));
assertTrue(test("hello world", "world hello"));
@@ -40,6 +44,7 @@ public class FullTextTest {
@Test
public void or() throws ParseException {
+ assertEquals("\"hello\" OR \"world\"", convertPattern("hello OR
world"));
assertTrue(test("hello OR world", "hello"));
assertTrue(test("hello OR world", "world"));
assertFalse(test("hello OR world", "hi"));
@@ -47,12 +52,15 @@ public class FullTextTest {
@Test
public void not() throws ParseException {
+ assertEquals("\"hello\" -\"world\"", convertPattern("hello -world"));
assertTrue(test("hello -world", "hello"));
assertFalse(test("hello -world", "hello world"));
}
@Test
public void quoted() throws ParseException {
+ assertEquals("\"hello world\"", convertPattern("\"hello world\""));
+ assertEquals("\"hello world\" \"world\"", convertPattern("\"hello
world\" world"));
assertTrue(test("\"hello world\"", "hello world"));
assertFalse(test("\"hello world\"", "world hello"));
assertTrue(test("\"hello-world\"", "hello-world"));
@@ -64,6 +72,7 @@ public class FullTextTest {
@Test
public void escaped() throws ParseException {
+ assertEquals("\"\\\"hello world\\\"\"", convertPattern("\"\\\"hello
world\\\"\""));
assertFalse(test("\\\"hello\\\"", "hello"));
assertTrue(test("\"hello\"", "\"hello\""));
assertTrue(test("\\\"hello\\\"", "\"hello\""));
@@ -72,6 +81,14 @@ public class FullTextTest {
}
@Test
+ public void boost() throws ParseException {
+ assertEquals("\"hello\"^2", convertPattern("hello^2"));
+ assertEquals("\"hello world\"^2", convertPattern("\"hello world\"^2"));
+ assertTrue(test("hello^2", "hello"));
+ assertTrue(test("\"hello\"^0.2", "hello"));
+ }
+
+ @Test
public void invalid() throws ParseException {
testInvalid("", "(*); expected: term");
testInvalid("x OR ", "x OR(*); expected: term");
@@ -94,9 +111,14 @@ public class FullTextTest {
assertEquals(expectedMessage, msg);
}
}
+
+ private static String convertPattern(String pattern) throws ParseException
{
+ FullTextExpression e = FullTextParser.parse(pattern);
+ return e.toString();
+ }
private static boolean test(String pattern, String value) throws
ParseException {
- FullTextSearchImpl.FullTextExpression e =
FullTextSearchImpl.FullTextParser.parse(pattern);
+ FullTextExpression e = FullTextParser.parse(pattern);
return e.evaluate(value);
}
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/LikePatternTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/LikePatternTest.java?rev=1501713&r1=1501712&r2=1501713&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/LikePatternTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/ast/LikePatternTest.java
Wed Jul 10 10:36:52 2013
@@ -21,6 +21,8 @@ package org.apache.jackrabbit.oak.query.
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
+
+import org.apache.jackrabbit.oak.query.fulltext.LikePattern;
import org.junit.Test;
/**
@@ -37,7 +39,7 @@ public class LikePatternTest {
}
private static void pattern(String pattern, String match, String noMatch,
String lower, String upper) {
- ComparisonImpl.LikePattern p = new ComparisonImpl.LikePattern(pattern);
+ LikePattern p = new LikePattern(pattern);
if (match != null) {
assertTrue(p.matches(match));
}