Author: ssmiweve
Date: 2008-03-21 00:02:17 +0100 (Fri, 21 Mar 2008)
New Revision: 6264
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
Log:
SEARCH-Sortby: word [space] [hyphen] [space] word
strip out SKIP and OPERATOR characters in *both* query and term.
SEARCH-3742 - Hunt down memory leak (AspectJ)
TokenMatch now immutable and constructed through the flyweight pattern.
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
2008-03-18 20:37:04 UTC (rev 6263)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,5 +1,5 @@
/*
- * Copyright (2005-2007) Schibsted Søk AS
+ * Copyright (2005-2008) Schibsted Søk AS
* This file is part of SESAT.
*
* SESAT is free software: you can redistribute it and/or modify
@@ -58,7 +58,7 @@
private static final Collection<TokenPredicate> PREDICATES_APPLICABLE;
static {
- final Collection<TokenPredicate> predicates = new ArrayList();
+ final Collection<TokenPredicate> predicates = new
ArrayList<TokenPredicate>();
// Add all TokenPredicates. Unfortunately we have no way of globally
knowing
// which TokenPredicates can be multi-term (multi-word) matches.
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
2008-03-18 20:37:04 UTC (rev 6263)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/* Copyright (2005-2007) Schibsted Søk AS
+/* Copyright (2005-2008) Schibsted Søk AS
* This file is part of SESAT.
*
* SESAT is free software: you can redistribute it and/or modify
@@ -51,19 +51,17 @@
/**
* Duplication of the parser's definition of SKIP. Must be kept uptodate!
+ * It's actually a duplication of the WORD_SEPARATOR (but that is itself a
duplication of SKIP.
*/
char[][] SKIP_CHARACTER_RANGES = {
{' ', ' '},
{'!', '!'},
- {'\u0023', '\''/*\u0027*/},
- {'\u002a', '\u002c'},
- {'\u002e', '\u002f'},
- {'\u003b', '\u0040'},
+ {'\u0023', '\u0040'},
{'\u005b', '\u0060'},
{'\u007b', '\u00bf'},
{'\u00d7', '\u00d7'},
{'\u00f7', '\u00f7'},
- {'\u2010', '\u2015'}
+ {'\u2010', '\u2015'}
};
/**
@@ -72,10 +70,10 @@
String[] OPERATORS = {"*", " -", " +", "(", ")"};
/** The Context an QueryParser implementation needs to work off.
- * The QueryParser is not responsible for
+ * The QueryParser's context is responsible for:
* - holding the user's orginal inputted query string,
* - holding the tokenEvalautorFactory responsible for tokenPredicate to
evaluator mappings,
- * - creation of Clause subtypes.
+ * - creation of Clause subtypes (using the flyweight pattern).
**/
public interface Context extends BaseContext, QueryStringContext,
TokenEvaluationEngineContext {
@@ -186,7 +184,7 @@
* Creator wrapper method for NumberGroupClause objects.
* The methods also allow a chunk of creation logic for the
NumberGroupClause to be moved
* out of the QueryParserImpl.jj file to here.
- *
+ *
* @param term the term this clause represents.
* @param field any field this clause was specified against.
* @return returns a OrOrganisationNumberClauseImplnstance matching
the term, left and right child clauses.
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
2008-03-18 20:37:04 UTC (rev 6263)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,5 +1,5 @@
/*
- * Copyright (2005-2007) Schibsted Søk AS
+ * Copyright (2005-2008) Schibsted Søk AS
* This file is part of SESAT.
*
* SESAT is free software: you can redistribute it and/or modify
@@ -17,53 +17,95 @@
*/
package no.sesat.search.query.token;
+import java.lang.ref.Reference;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import no.sesat.commons.ref.ReferenceMap;
/** Used by VeryFastTokenEvaluator for matches against part of the query to a
fast list.
- *
+ *
+ * <b>Immutable</b>
+ *
* @author <a href="mailto:[EMAIL PROTECTED]">Mck</a>
* @version $Id$
**/
-final class TokenMatch implements Comparable {
+final class TokenMatch{
+ // Constants -----------------------------------------------------
+
+ private static final int WEAK_CACHE_INITIAL_CAPACITY = 2000;
+ private static final float WEAK_CACHE_LOAD_FACTOR = 0.5f;
+ private static final int WEAK_CACHE_CONCURRENCY_LEVEL = 16;
+
+ private static final ReferenceMap<Integer,TokenMatch> WEAK_CACHE
+ = new ReferenceMap<Integer,TokenMatch>(
+ ReferenceMap.Type.WEAK,
+ new ConcurrentHashMap<Integer,Reference<TokenMatch>>(
+ WEAK_CACHE_INITIAL_CAPACITY,
+ WEAK_CACHE_LOAD_FACTOR,
+ WEAK_CACHE_CONCURRENCY_LEVEL));
+
+ // Static --------------------------------------------------------
+
+ /** Hands out an instance given the 'constructing arguments'.
+ * We use the flyweight pattern since instances are immutable.
+ *
+ * @param token
+ * @param match
+ * @param value
+ * @return
+ */
+ public static TokenMatch instanceOf(
+ final String token,
+ final String match,
+ final String value) {
+
+ final int hashCode = computeHashCode(token, match, value);
+
+ TokenMatch tm = WEAK_CACHE.get(hashCode);
+
+ if(null == tm){
+ tm = new TokenMatch(token, match, value);
+ WEAK_CACHE.put(hashCode, tm);
+ }
+
+ return tm;
+ }
+
+ private static int computeHashCode(
+ final String token,
+ final String match,
+ final String value) {
+
+ int result = 17;
+ result = 37*result + token.hashCode();
+ result = 37*result + match.hashCode();
+ result = 37*result + value.hashCode();
+ return result;
+ }
+
+ // Attributes ----------------------------------------------------
+
private final String token;
private final String match;
private final String value;
- private final Integer start;
- private final Integer end;
private final Pattern matcher;
- /**
- * Holds value of property _touched.
- */
- private boolean touched = false;
- public TokenMatch(final String token, final String match, final String
value, final int start, final int end) {
+ // Constructors -------------------------------------------------
+
+ private TokenMatch(final String token, final String match, final String
value) {
+
this.token = token;
this.match = match;
this.value = value;
- this.start = Integer.valueOf(start);
- this.end = Integer.valueOf(end);
// (^|\s) or ($|\s) is neccessary to avoid matching fragments of words.
matcher = Pattern.compile("(^|\\s)" + match + "($|\\s)",
RegExpEvaluatorFactory.REG_EXP_OPTIONS);
}
- public int compareTo(final Object o) {
- final TokenMatch other = (TokenMatch) o;
+ // Public --------------------------------------------------------
- return start.compareTo(other.getStart());
- }
-
/**
- * Get the start index.
- *
- * @return the end index.
- */
- public Integer getStart() {
- return start;
- }
-
- /**
* Get the match.
*
* @return the match.
@@ -75,7 +117,7 @@
/**
* Get the regular expression Matcher to use to find a sub-match.
*
- * @param string
+ * @param string
* @return the match.
*/
public Matcher getMatcher(final String string) {
@@ -100,38 +142,23 @@
return value;
}
- /**
- * Get the end.
- *
- * @return the end.
- */
- public Integer getEnd() {
- return end;
+ @Override
+ public String toString() {
+ return "token=\"" + token
+ + "\"; match=\"" + match
+ + "\"; value=" + (value == null ? "null" : "\"" + value + "\"")
+ + "; matcher=" + matcher + ";";
}
- /**
- * Getter for property touched.
- * @return Value of property touched.
- */
- public boolean isTouched() {
- return touched;
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof TokenMatch && obj.hashCode() == hashCode();
}
- /**
- * Setter for property touched.
- * @param touched New value of property touched.
- */
- public void setTouched(final boolean touched) {
- this.touched = touched;
+ @Override
+ public int hashCode() {
+ return computeHashCode(token, match, value);
}
- @Override
- public String toString() {
- return "token=\"" + token
- + "\"; match=\"" + match
- + "\"; value=" + (value == null ? "null" : "\"" + value + "\"")
- + "; start=" + start
- + "; end=" + end
- + "; matcher=" + matcher + ";";
- }
+
}
Modified:
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
===================================================================
---
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
2008-03-18 20:37:04 UTC (rev 6263)
+++
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/* Copyright (2005-2007) Schibsted Søk AS
+/* Copyright (2005-2008) Schibsted Søk AS
* This file is part of SESAT.
*
* SESAT is free software: you can redistribute it and/or modify
@@ -61,7 +61,9 @@
/**
* VeryFastTokenEvaluator is part of no.sesat.search.query.
*
+ * @todo sesat-ise. bring out to generic.sesam. make CGI_PATH easily
configurable. configurable cache settings.
*
+ *
* @author Ola Marius Sagli <a href="[EMAIL PROTECTED]">ola at schibstedsok</a>
* @author <a href="mailto:[EMAIL PROTECTED]">Mck Semb Wever</a>
* @version $Id$
@@ -73,7 +75,7 @@
}
// Constants -----------------------------------------------------
-
+
private static final Logger LOG =
Logger.getLogger(VeryFastTokenEvaluator.class);
private static final String ERR_FAILED_INITIALISATION = "Failed reading
configuration files";
private static final String ERR_QUERY_FAILED = "Querying the fast list
failed on ";
@@ -92,24 +94,28 @@
private static final Map<Site,Map<TokenPredicate,String[]>> LIST_NAMES
= new HashMap<Site,Map<TokenPredicate,String[]>>();
private static final ReentrantReadWriteLock LIST_NAMES_LOCK = new
ReentrantReadWriteLock();
-
- private static final GeneralCacheAdministrator CACHE = new
GeneralCacheAdministrator();
- private static final int REFRESH_PERIOD = 60; // one minute
- private static final int CACHE_CAPACITY = 100; // smaller than usual as
each entry can contain up to 600 values!
-
+
+ private static final GeneralCacheAdministrator CACHE_QUERY = new
GeneralCacheAdministrator();
+ private static final int REFRESH_PERIOD = 60;
+ private static final int CACHE_QUERY_CAPACITY = 100; // smaller than usual
as each entry can contain up to 600 values!
+
+ private static final GeneralCacheAdministrator CACHE_MATCHES = new
GeneralCacheAdministrator();
+ private static final int CACHE_MATCHES_CAPACITY = 1000;
+
private static final String OPERATOR_REGEX;
// Attributes ----------------------------------------------------
-
+
private final HTTPClient httpClient;
private final Context context;
private final Map<String, List<TokenMatch>> analysisResult;
// Static --------------------------------------------------------
-
+
static{
- CACHE.setCacheCapacity(CACHE_CAPACITY);
-
+ CACHE_QUERY.setCacheCapacity(CACHE_QUERY_CAPACITY);
+ CACHE_MATCHES.setCacheCapacity(CACHE_MATCHES_CAPACITY);
+
// build our operator regular expression
final StringBuilder operatorRegexpBuilder = new StringBuilder();
@@ -126,10 +132,10 @@
}
// Constructors -------------------------------------------------
-
+
/**
* Search fast and initialize analysis result.
- * @param cxt
+ * @param cxt
*/
VeryFastTokenEvaluator(final Context cxt) throws
VeryFastListQueryException{
@@ -143,19 +149,15 @@
final int port =
Integer.parseInt(props.getProperty(TOKEN_PORT_PROPERTY));
httpClient = HTTPClient.instance(host, port);
-
+
init();
// Remove whitespace (except space itself) and operator characters.
- analysisResult = queryFast(context.getQueryString()
- .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
- .replaceAll(SKIP_REGEX, "")
- .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
- .replaceAll(OPERATOR_REGEX, " "));
+ analysisResult = queryFast(cleanString(context.getQueryString()));
}
// Public --------------------------------------------------------
-
+
/**
* Find out if given token is on or more of the following.
* <li>GEO
@@ -167,14 +169,14 @@
* </ul>
*
* @param token can be any of the above
- * @param query
+ * @param query
* @return true if the query contains any of the above
*/
public boolean evaluateToken(final TokenPredicate token, final String
term, final String query) {
boolean evaluation = false;
final String[] listnames = getListNames(token);
-
+
if(null != listnames){
for(int i = 0; !evaluation && i < listnames.length; ++i){
@@ -186,17 +188,14 @@
} else {
// HACK since DefaultOperatorClause wraps its children
in parenthesis
- // Also remove any operator characters. (SEARCH-3883 &
SEARCH-3967)
- final String hackTerm =
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
+ final String hackTerm =
cleanString(term.replaceAll("\\(|\\)",""));
for (TokenMatch occurance :
analysisResult.get(listname)) {
final Matcher m = occurance.getMatcher(hackTerm);
evaluation = m.find() && m.start() == 0 && m.end()
== hackTerm.length();
- // keep track of which TokenMatch's we've used.
if (evaluation) {
- occurance.setTouched(true);
break;
}
}
@@ -215,28 +214,26 @@
* get all match values and values for given Fast list .
*
* @param token
- * @param term
+ * @param term
* @return a list of Tokens
*/
public Set<String> getMatchValues(final TokenPredicate token, final String
term) {
-
+
final Set<String> values = new HashSet<String>();
-
+
final String[] listnames = getListNames(token);
if(null != listnames){
for(int i = 0; i < listnames.length; i++){
final String listname = listnames[i];
if (analysisResult.containsKey(listname)) {
-
+
// HACK since DefaultOperatorClause wraps its children in
parenthesis
- // Also remove any operator characters. (SEARCH-3883 &
SEARCH-3967)
- final String hackTerm =
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
-
+ final String hackTerm =
cleanString(term.replaceAll("\\(|\\)",""));
+
for (TokenMatch occurance : analysisResult.get(listname)) {
-
+
final Matcher m = occurance.getMatcher(hackTerm);
- // keep track of which TokenMatch's we've used.
if (m.find() && m.start() == 0 && m.end() ==
hackTerm.length()) {
values.add(occurance.getValue());
}
@@ -248,20 +245,20 @@
}
/**
- *
+ *
* @param predicate
* @return
*/
public boolean isQueryDependant(final TokenPredicate predicate) {
return predicate.name().startsWith(EXACT_PREFIX.toUpperCase());
}
-
+
// Package protected ---------------------------------------------
// Protected -----------------------------------------------------
// Private -------------------------------------------------------
-
+
private void init() {
try {
@@ -276,17 +273,17 @@
final Site site = cxt.getSite();
final Site parent = site.getParent();
final boolean parentUninitialised;
-
+
try{
LIST_NAMES_LOCK.readLock().lock();
-
+
// initialise the parent site's configuration
parentUninitialised = (null != parent && null ==
LIST_NAMES.get(parent));
-
+
}finally{
LIST_NAMES_LOCK.readLock().unlock();
}
-
+
if(parentUninitialised){
initImpl(ContextWrapper.wrap(
Context.class,
@@ -298,10 +295,10 @@
cxt
));
}
-
+
try{
LIST_NAMES_LOCK.writeLock().lock();
-
+
if(null == LIST_NAMES.get(site)){
// create map entry for this site
@@ -366,14 +363,14 @@
if (query != null && 0 < query.length()) {
try{
- result = (Map<String, List<TokenMatch>>)
CACHE.getFromCache(query, REFRESH_PERIOD);
+ result = (Map<String, List<TokenMatch>>)
CACHE_QUERY.getFromCache(query, REFRESH_PERIOD);
} catch (NeedsRefreshException nre) {
-
+
boolean updatedCache = false;
result = new HashMap<String,List<TokenMatch>>();
String url = null;
-
+
try {
final String token =
URLEncoder.encode(query.replaceAll("\"", ""), "utf-8");
@@ -392,21 +389,21 @@
final String name = trans.getAttribute("NAME");
final String custom = trans.getAttribute("CUSTOM");
final String exactname = 0 <=
name.indexOf(LIST_PREFIX) && 0 < name.indexOf(LIST_SUFFIX)
- ? LIST_PREFIX + EXACT_PREFIX
+ ? LIST_PREFIX + EXACT_PREFIX
+ name.substring(name.indexOf('_') + 1,
name.indexOf("QM"))
+ LIST_SUFFIX
: null;
- if(custom.matches(".+->.*") && usesListName(name,
exactname)){
+ if(custom.matches(".+->.*") && usesListName(name,
exactname)){
final String match = (custom.indexOf("->") >0
? custom.substring(0, custom.indexOf("->"))
: custom)
// remove words made solely of characters
that the parser considers whitespace
.replaceAll("\\b" + SKIP_REGEX + "+\\b", "
");
-
- final String value = custom.indexOf("->") > 0
- ? custom.substring(custom.indexOf("->") +
2)
+
+ final String value = custom.indexOf("->") > 0
+ ? custom.substring(custom.indexOf("->") +
2)
: null;
addMatch(name, match, value,query, result);
@@ -417,7 +414,8 @@
}
}
}
- CACHE.putInCache(query, result);
+ result = Collections.unmodifiableMap(result);
+ CACHE_QUERY.putInCache(query, result);
updatedCache = true;
} catch (UnsupportedEncodingException ignore) {
@@ -432,8 +430,8 @@
result = (Map<String,
List<TokenMatch>>)nre.getCacheContent();
throw new VeryFastListQueryException(ERR_PARSE_FAILED +
url, e1);
}finally{
- if(!updatedCache){
- CACHE.cancelUpdate(query);
+ if(!updatedCache){
+ CACHE_QUERY.cancelUpdate(query);
}
}
}
@@ -458,7 +456,7 @@
qNew);
while (m.find()) {
- final TokenMatch tknMatch = new TokenMatch(name, match, value,
m.start(), m.end());
+ final TokenMatch tknMatch = TokenMatch.instanceOf(name, match,
value);
if (!result.containsKey(name)) {
result.put(name, new ArrayList<TokenMatch>());
@@ -467,27 +465,31 @@
result.get(name).add(tknMatch);
if (result.get(name).size() % 100 == 0) {
- LOG.warn("Pattern: " + pattern.pattern() + " name: " + name +
" query: " + query + " match: " + match + " query2: " + qNew);
+ LOG.warn("Pattern: " + pattern.pattern()
+ + " name: " + name
+ + " query: " + query
+ + " match: " + match
+ + " query2: " + qNew);
}
}
}
private boolean usesListName(final String listname, final String
exactname){
-
+
boolean uses = false;
try{
LIST_NAMES_LOCK.readLock().lock();
Site site = context.getSite();
-
+
while(!uses && null != site){
-
+
// find listnames used for this token predicate
for(String[] listnames : LIST_NAMES.get(site).values()){
uses |= 0 <= Arrays.binarySearch(listnames, listname,
null);
uses |= null != exactname && 0 <=
Arrays.binarySearch(listnames, exactname, null);
if(uses){ break; }
}
-
+
// prepare to go to parent
site = site.getParent();
}
@@ -496,20 +498,21 @@
}
return uses;
}
+
private String[] getListNames(final TokenPredicate token){
-
-
-
+
+
+
String[] listNames = null;
try{
LIST_NAMES_LOCK.readLock().lock();
Site site = context.getSite();
-
+
while(null == listNames && null != site){
-
+
// find listnames used for this token predicate
listNames = LIST_NAMES.get(site).get(token);
-
+
// prepare to go to parent
site = site.getParent();
}
@@ -518,6 +521,18 @@
}
return listNames;
}
-
+
+ private String cleanString(final String string){
+
+ // Strip out SKIP characters we are not interested in.
+ // Also remove any operator characters. (SEARCH-3883 & SEARCH-3967)
+
+ return string
+ .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
+ .replaceAll(SKIP_REGEX, "")
+ .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
+ .replaceAll(OPERATOR_REGEX, " ");
+ }
+
// Inner classes -------------------------------------------------
}
Modified: branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj
===================================================================
--- branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj 2008-03-18
20:37:04 UTC (rev 6263)
+++ branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj 2008-03-20
23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/** Copyright (2005-2007) Schibsted Søk AS
+/** Copyright (2005-2008) Schibsted Søk AS
* This file is part of SESAT.
*
* SESAT is free software: you can redistribute it and/or modify
@@ -29,7 +29,7 @@
import java.util.ArrayList;
import no.sesat.search.query.*;
-/** This class is NOT synchronized. You must use a separate instance for each
query.
+/** This class is NOT synchronized. You must use a separate instance for each
query.
*
* @version $Id$
* @author <a href="mailto:[EMAIL PROTECTED]>mick</a>
@@ -65,14 +65,14 @@
PARSER_END(QueryParserImpl)
-/** Skip these characters, they are considered "white space"
+/** Skip these characters, they are considered "white space"
* See http://www.unicode.org/charts/ for unicode charts.
*
* WARNING!! These ranges are duplicated in QueryParser.java
* !! Always update it after any changes here !!
**/
<*>SKIP : {
- " " | "!"
+ " " | "!"
| < [ "\u0023"-"\u0027" ] >
| < [ "\u002a"-"\u002c" ] >
| < [ "\u002e"-"\u002f" ] >
@@ -85,15 +85,15 @@
}
/* These are the the above skip characters */
-/* # % & ' */
-/* * + , */
+/* # % & ' */
+/* * + , */
/* . / */
/* ; < = > ? @ */
/* [ \ ] ^ _ ` */
/* ‐ ‑ ‒ – — ― */
<DEFAULT,EMAIL_DISABLED,PHONE_NUMBER_DISABLED,NUMBER_GROUP_DISABLED,QUOTED_WORD_DISABLED>
-// https?://([-\\w\\.]+)+(:\\d+)?(/~?([\\w/_\\.]*(\\?\\S+)?)?)?
+// https?://([-\\w\\.]+)+(:\\d+)?(/~?([\\w/_\\.]*(\\?\\S+)?)?)?
TOKEN : { <URL:
"http"("s")?"://"<WORD>(":"<INTEGER>)?(<URL_WORD>)*(("?"<URL_WORD>)?)*("#"<URL_WORD>)?>
| <#URL_WORD: (<WORD>|<ENCODED_CHAR>|"-"|"_"|"~"|"/"|"+"|"&")+>
| <#ENCODED_CHAR:
"%"(<DIGIT>|"A"|"a"|"B"|"b"|"C"|"c"|"D"|"d"|"E"|"e"|"F"|"f"){2}>}
@@ -101,7 +101,7 @@
<DEFAULT,URL_DISABLED,PHONE_NUMBER_DISABLED,NUMBER_GROUP_DISABLED,QUOTED_WORD_DISABLED>
TOKEN : { <EMAIL: <WORD>"@"<WORD>"."<WORD>> }
-// Very generic to match any Internation Phone Number !!
+// Very generic to match any Internation Phone Number !!
// TODO deal with optional area codes. eg australian numbers can be written
(02) 4878 9336
// number_group_disabled is a super state to phone_number_disabled
<DEFAULT,URL_DISABLED,EMAIL_DISABLED,QUOTED_WORD_DISABLED>
@@ -117,7 +117,7 @@
<*>TOKEN : {
<AND: ("AND"|"+")>
| <OR: ("OR"|"|")>
- | <NOT: "NOT"|"-">
+ | <NOT: "NOT"|"-">
| <ANDNOT: ("ANDNOT")>
| <INTEGER: ((<DIGIT>)+)>
| <WORD:
(<LETTER>|<DIGIT>|<WORD_SYMBOL_PREFIX>)(<LETTER>|<DIGIT>|<WORD_SYMBOL_MIDDLE>)*("*")?>
@@ -127,8 +127,8 @@
| <#PHONE_SYMBOL: (".")|("-")|("/")>
| <#WORD_SYMBOL_PREFIX: (".")|("<")|("=")|(">")>
| <#WORD_SYMBOL_MIDDLE: (".")|<HYPON>|("_")|("+")>
- | <#WORD_SEPARATOR: [ // just a copy of the SKIP declaration.
- " ", "!",
+ | <#WORD_SEPARATOR: [ // just a copy of the SKIP declaration. see SKIP
comment!
+ " ", "!",
"\u0023"-"\u0040",
"\u005b"-"\u0060",
"\u007b"-"\u00bf",
@@ -173,11 +173,11 @@
]>
}
-Clause parse() : {
+Clause parse() : {
Clause clause;
LOG.info("parsing: "+context.getQueryString());
}{
- (clause = rootPrecedence()) {return clause;}
+ (clause = rootPrecedence()) {return clause;}
}
/** PRECEDENCES **/
@@ -196,16 +196,16 @@
(clause = hiddenDefaultOperation()) { try{return
clause;}finally{exitMethod();} }
|
(clause = looseJoinPrecedence()) { try{return
clause;}finally{exitMethod();} }
-
+
}
Clause looseJoinPrecedence() :{
Clause clause;
enterMethod("looseJoinPrecedence()");
}{
- (clause = orOperation()) { try{return clause;}finally{exitMethod();} }
+ (clause = orOperation()) { try{return clause;}finally{exitMethod();} }
|
- (clause = strongJoinPrecedence()) { try{return
clause;}finally{exitMethod();} }
+ (clause = strongJoinPrecedence()) { try{return
clause;}finally{exitMethod();} }
}
Clause strongJoinPrecedence() :{
@@ -215,8 +215,8 @@
(clause = andOperation()) { try{return clause;}finally{exitMethod();} }
|
(clause = leafPrecedence()) { try{return clause;}finally{exitMethod();} }
-
+
}
Clause leafPrecedence() :{
@@ -226,7 +226,7 @@
(clause = notOperation()) { try{return clause;}finally{exitMethod();} }
|
(clause = andNotOperation()) { try{return clause;}finally{exitMethod();} }
- |
+ |
(clause = leaf()) { try{return clause;}finally{exitMethod();} }
}
@@ -236,7 +236,7 @@
Clause left,right;
enterMethod("hiddenDefaultOperation()");
}{
- ((left = looseJoinPrecedence())(right = noPrecedence()))
+ ((left = looseJoinPrecedence())(right = noPrecedence()))
{ try{return
context.createDefaultOperatorClause(left,right);}finally{exitMethod();} }
}
@@ -246,7 +246,7 @@
enterMethod("andNotOperation()");
}{
(<ANDNOT>(right = noPrecedence())) { try{return
context.createAndNotClause(right);}finally{exitMethod();} }
-}
+}
Clause orOperation() :{
Clause left,right;
@@ -255,7 +255,7 @@
}{
("("(clause = hiddenOrOperation())")") { try{return
clause;}finally{exitMethod();} }
|
- ((left = strongJoinPrecedence())<OR>(right = looseJoinPrecedence()))
+ ((left = strongJoinPrecedence())<OR>(right = looseJoinPrecedence()))
{ try{return
context.createOrClause(left,right);}finally{exitMethod();} }
}
@@ -263,7 +263,7 @@
Clause left,right;
enterMethod("andOperation()");
}{
- ((left = leafPrecedence())<AND>(right = strongJoinPrecedence()))
+ ((left = leafPrecedence())<AND>(right = strongJoinPrecedence()))
{ try{return
context.createAndClause(left,right);}finally{exitMethod();} }
}
@@ -271,10 +271,10 @@
Clause left,right;
enterMethod("hiddenAndOperation()");
}{
- ((left = leafPrecedence())(right = hiddenAndOperation()))
+ ((left = leafPrecedence())(right = hiddenAndOperation()))
{ try{return
context.createAndClause(left,right);}finally{exitMethod();} }
|
- ((left = leafPrecedence())(right = strongJoinPrecedence()))
+ ((left = leafPrecedence())(right = strongJoinPrecedence()))
{ try{return
context.createAndClause(left,right);}finally{exitMethod();} }
}
@@ -282,17 +282,17 @@
Clause left,right;
enterMethod("hiddenOrOperation()");
}{
-
+
// These are the real hidden or operands
- ((left = leafPrecedence())(right = hiddenOrOperation()))
+ ((left = leafPrecedence())(right = hiddenOrOperation()))
{ try{return
context.createOrClause(left,right);}finally{exitMethod();} }
|
- ((left = strongJoinPrecedence())(right = looseJoinPrecedence()))
+ ((left = strongJoinPrecedence())(right = looseJoinPrecedence()))
{ try{return
context.createOrClause(left,right);}finally{exitMethod();} }
|
// Not really an OR clause. Just () wrapped around something else.
Therefore ignore the ()'s
(left = leafPrecedence()) { try{return left;}finally{exitMethod();} }
-
+
}
NotClause notOperation() :{
@@ -313,20 +313,20 @@
// A real field
((field=<WORD>)<FIELD_SUFFIX>)(clause = fieldedLeaf(field)) {try{return
clause;}finally{exitMethod();}}
|
- // An accidential field.
+ // An accidential field.
// XXX This could cause problems as it destroys the construction of a
right-leaning forests.
- (left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = fieldedLeaf(null))
+ (left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = fieldedLeaf(null))
{try{return
context.createDefaultOperatorClause(left,right);}finally{exitMethod();}}
|
(clause = fieldedLeaf(null)) {try{return clause;}finally{exitMethod();}}
}
-
+
Clause fieldedLeaf(final Token field) :{
enterMethod("fieldedLeaf()");
}{
- <PHONE_NUMBER>
- {
+ <PHONE_NUMBER>
+ {
try{
// remove the PHONE_SYMBOL sub-tokens
final String term = token.image.replaceAll("\\.|-|/|
|\\(|\\)","");
@@ -340,9 +340,9 @@
}finally{exitMethod();}
}
- |
+ |
<NUMBER_GROUP>
- {
+ {
try{
// Remove whitespace
final String term = token.image.trim().replaceAll(" ","");
@@ -358,7 +358,7 @@
}
|
<QUOTED_WORD>
- {
+ {
try{
final String f = field == null ? null : field.image;
final PhraseClause phClause =
context.createPhraseClause(token.image, f );
@@ -371,18 +371,18 @@
}finally{exitMethod();}
}
- |
+ |
<WORD>
- {
+ {
try{
final String f = field == null ? null : field.image;
return context.createWordClause(token.image, f);
}finally{exitMethod();}
}
- |
+ |
<INTEGER>
- {
+ {
try{
final String f = field == null ? null : field.image;
return context.createIntegerClause(token.image, f);
@@ -391,16 +391,16 @@
}
|
<URL>
- {
+ {
try{
final String f = field == null ? null : field.image;
return context.createUrlClause(token.image, f);
}finally{exitMethod();}
}
- |
+ |
<EMAIL>
- {
+ {
try{
final String f = field == null ? null : field.image;
return context.createEmailClause(token.image, f);
Modified:
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
===================================================================
---
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
2008-03-18 20:37:04 UTC (rev 6263)
+++
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
2008-03-20 23:02:17 UTC (rev 6264)
@@ -25,52 +25,54 @@
/**
* A simple implementation of a search result item.
- * Is not multi-thread safe.
+ * Is not multi-thread safe.
* Mutates on setter methods.
* Delegates all fields (of all types) to the one map.
+ *
+ * Any field "recordid" is considered as a key to equality between result
items.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Magnus Eklund</a>
* @version <tt>$Id$</tt>
*/
public class BasicResultItem implements ResultItem {
-
+
private static final String URL_KEY = "url";
private static final String TITLE_KEY = "title";
private final HashMap<String,Serializable> fields = new
HashMap<String,Serializable>();
-
+
/**
- *
+ *
*/
public BasicResultItem(){}
-
+
/**
- *
+ *
* @param title
- * @param url
+ * @param url
*/
protected BasicResultItem(final String title, final String url){
-
+
fields.put(TITLE_KEY, StringChopper.chop(title, -1));
fields.put(URL_KEY, StringChopper.chop(url, -1));
}
-
+
/**
- *
- * @param copy
+ *
+ * @param copy
*/
public BasicResultItem(final ResultItem copy){
-
+
for(String fieldName : copy.getFieldNames()){
fields.put(fieldName, copy.getObjectField(fieldName));
}
}
/**
- *
- * @param field
- * @param value
- * @return
+ *
+ * @param field
+ * @param value
+ * @return
*/
public BasicResultItem addField(final String field, final String value) {
@@ -79,9 +81,9 @@
}
/**
- *
- * @param field
- * @return
+ *
+ * @param field
+ * @return
*/
public String getField(final String field) {
@@ -90,9 +92,9 @@
}
/**
- *
- * @param field
- * @return
+ *
+ * @param field
+ * @return
*/
public Serializable getObjectField(final String field) {
@@ -100,21 +102,21 @@
}
/**
- *
- * @param field
- * @param value
- * @return
+ *
+ * @param field
+ * @param value
+ * @return
*/
public BasicResultItem addObjectField(final String field, final
Serializable value) {
-
+
fields.put(field, value);
return this;
}
-
+
/**
- *
- * @param field
- * @return
+ *
+ * @param field
+ * @return
*/
public Integer getInteger(final String field) {
@@ -123,23 +125,23 @@
}
/**
- *
- * @param field
- * @param maxLength
- * @return
+ *
+ * @param field
+ * @param maxLength
+ * @return
*/
public String getField(final String field, final int maxLength) {
-
+
final String fieldValue = (String) fields.get(field);
-
+
return fieldValue != null && fieldValue.trim().length() > 0
? StringChopper.chop(fieldValue, maxLength)
: null;
}
/** Returns a defensive copy of the field names existing in this
resultItem.
- *
- * @return
+ *
+ * @return
*/
public Collection<String> getFieldNames() {
@@ -147,9 +149,9 @@
}
/** Returns a live copy of the field's collection.
- *
- * @param field
- * @return
+ *
+ * @param field
+ * @return
*/
public Collection<String> getMultivaluedField(final String field) {
@@ -157,13 +159,13 @@
}
/**
- *
- * @param field
- * @param value
- * @return
+ *
+ * @param field
+ * @param value
+ * @return
*/
public BasicResultItem addToMultivaluedField(final String field, final
String value) {
-
+
if (! fields.containsKey(field)) {
fields.put(field, new ArrayList<String>());
}
@@ -173,8 +175,9 @@
return this;
}
+ @Override
public boolean equals(final Object obj) {
-
+
boolean result = false;
if( obj instanceof ResultItem ){
final ResultItem other = (ResultItem) obj;
@@ -198,12 +201,12 @@
return result;
}
+ @Override
public int hashCode() {
- // FIXME very specific undocumented stuff here
if (getField("recordid") != null) {
return getField("recordid").hashCode();
-
+
} else {
// there nothing else to this object than the fields map.
return fields.hashCode();
_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits