Author: ssmiweve
Date: 2008-01-24 20:12:02 +0100 (Thu, 24 Jan 2008)
New Revision: 6035
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
Log:
SEARCH-3967 - Splitting av geo med bindestrek (word joining hyphons)
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
2008-01-24 18:27:23 UTC (rev 6034)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
2008-01-24 19:12:02 UTC (rev 6035)
@@ -17,8 +17,6 @@
*
* Created on 12 January 2006, 12:32
*
- * To change this template, choose Tools | Template Manager
- * and open the template in the editor.
*/
package no.sesat.search.query.parser;
@@ -68,7 +66,10 @@
{'\u2010', '\u2015'}
};
- char[] OPERATOR_CHARACTERS = {'*', '-', '+', '(', ')'};
+ /**
+ * Duplication of the parser's operators. Must be kept uptodate!
+ */
+ String[] OPERATORS = {"*", " -", " +", "(", ")"};
/** The Context an QueryParser implementation needs to work off.
* The QueryParser is not responsible for
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
2008-01-24 18:27:23 UTC (rev 6034)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
2008-01-24 19:12:02 UTC (rev 6035)
@@ -50,7 +50,6 @@
import org.apache.log4j.Logger;
-import org.apache.commons.lang.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@@ -121,13 +120,14 @@
final StringBuilder operatorRegexpBuilder = new StringBuilder();
- operatorRegexpBuilder.append("[");
+ operatorRegexpBuilder.append('(');
- for (char c : QueryParser.OPERATOR_CHARACTERS) {
- operatorRegexpBuilder.append('\\').append(c);
+ for (String c : QueryParser.OPERATORS) {
+ operatorRegexpBuilder.append('"' + Matcher.quoteReplacement(c) +
"\"|");
}
- operatorRegexpBuilder.append("]");
+ operatorRegexpBuilder.setLength(operatorRegexpBuilder.length() - 1);
+ operatorRegexpBuilder.append(')');
OPERATOR_REGEX = operatorRegexpBuilder.toString();
}
@@ -157,8 +157,8 @@
analysisResult = queryFast(context.getQueryString()
.replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
.replaceAll(SKIP_REGEX, "")
- .replaceAll(OPERATOR_REGEX, "")
- .replaceAll("xxKEEPWSxx", " ")); // Hack to keep spaces.
+ .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
+ .replaceAll(OPERATOR_REGEX, " "));
}
// Public --------------------------------------------------------
@@ -174,6 +174,7 @@
* </ul>
*
* @param token can be any of the above
+ * @param query
* @return true if the query contains any of the above
*/
public boolean evaluateToken(final TokenPredicate token, final String
term, final String query) {
@@ -192,7 +193,7 @@
} else {
// HACK since DefaultOperatorClause wraps its children
in parenthesis
- // Also remove any operator characters. (SEARCH-3883)
+ // Also remove any operator characters. (SEARCH-3883 &
SEARCH-3967)
final String hackTerm =
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
for (TokenMatch occurance :
analysisResult.get(listname)) {
@@ -326,6 +327,7 @@
* Search fast and find out if the given tokens are company, firstname,
lastname etc
* @param query
*/
+ @SuppressWarnings("unchecked")
private Map<String, List<TokenMatch>> queryFast(final String query) throws
VeryFastListQueryException{
LOG.trace("queryFast( " + query + " )");
_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits