result

ssmiweve Thu, 20 Mar 2008 16:00:39 -0700

Author: ssmiweve
Date: 2008-03-21 00:02:17 +0100 (Fri, 21 Mar 2008)
New Revision: 6264


Modified:
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
   branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj
   
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
Log:
SEARCH-Sortby: word [space] [hyphen] [space] word
 strip out SKIP and OPERATOR characters in *both* query and term.
SEARCH-3742 - Hunt down memory leak (AspectJ)
 TokenMatch now immutable and constructed through the flyweight pattern.


Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
   2008-03-18 20:37:04 UTC (rev 6263)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/DefaultOperatorClauseImpl.java
   2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,5 +1,5 @@
 /*
- * Copyright (2005-2007) Schibsted Søk AS
+ * Copyright (2005-2008) Schibsted Søk AS
  * This file is part of SESAT.
  *
  *   SESAT is free software: you can redistribute it and/or modify
@@ -58,7 +58,7 @@
     private static final Collection<TokenPredicate> PREDICATES_APPLICABLE;
 
     static {
-        final Collection<TokenPredicate> predicates = new ArrayList();
+        final Collection<TokenPredicate> predicates = new 
ArrayList<TokenPredicate>();
 
         // Add all TokenPredicates. Unfortunately we have no way of globally 
knowing
         //  which TokenPredicates can be multi-term (multi-word) matches.

Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
 2008-03-18 20:37:04 UTC (rev 6263)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
 2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/* Copyright (2005-2007) Schibsted Søk AS
+/* Copyright (2005-2008) Schibsted Søk AS
  * This file is part of SESAT.
  *
  *   SESAT is free software: you can redistribute it and/or modify
@@ -51,19 +51,17 @@
 
     /**
      * Duplication of the parser's definition of SKIP. Must be kept uptodate!
+     * It's actually a duplication of the WORD_SEPARATOR (but that is itself a 
duplication of SKIP.
      */
     char[][] SKIP_CHARACTER_RANGES = {
         {' ', ' '},
         {'!', '!'},
-        {'\u0023', '\''/*\u0027*/},
-        {'\u002a', '\u002c'},
-        {'\u002e', '\u002f'},
-        {'\u003b', '\u0040'},
+        {'\u0023', '\u0040'},
         {'\u005b', '\u0060'},
         {'\u007b', '\u00bf'},
         {'\u00d7', '\u00d7'},
         {'\u00f7', '\u00f7'},
-        {'\u2010', '\u2015'}        
+        {'\u2010', '\u2015'}
     };
 
     /**
@@ -72,10 +70,10 @@
     String[] OPERATORS = {"*", " -", " +", "(", ")"};
 
     /** The Context an QueryParser implementation needs to work off.
-     * The QueryParser is not responsible for
+     * The QueryParser's context is responsible for:
      *  - holding the user's orginal inputted query string,
      *  - holding the tokenEvalautorFactory responsible for tokenPredicate to 
evaluator mappings,
-     *  - creation of Clause subtypes.
+     *  - creation of Clause subtypes (using the flyweight pattern).
      **/
     public interface Context extends BaseContext, QueryStringContext, 
TokenEvaluationEngineContext {
 
@@ -186,7 +184,7 @@
          * Creator wrapper method for NumberGroupClause objects.
          * The methods also allow a chunk of creation logic for the 
NumberGroupClause to be moved
          * out of the QueryParserImpl.jj file to here.
-         * 
+         *
          * @param term the term this clause represents.
          * @param field any field this clause was specified against.
          * @return returns a OrOrganisationNumberClauseImplnstance matching 
the term, left and right child clauses.

Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   2008-03-18 20:37:04 UTC (rev 6263)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,5 +1,5 @@
 /*
- * Copyright (2005-2007) Schibsted Søk AS
+ * Copyright (2005-2008) Schibsted Søk AS
  * This file is part of SESAT.
  *
  *   SESAT is free software: you can redistribute it and/or modify
@@ -17,53 +17,95 @@
  */
 package no.sesat.search.query.token;
 
+import java.lang.ref.Reference;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import no.sesat.commons.ref.ReferenceMap;
 
 /** Used by VeryFastTokenEvaluator for matches against part of the query to a 
fast list.
- * 
+ *
+ * <b>Immutable</b>
+ *
  * @author <a href="mailto:[EMAIL PROTECTED]">Mck</a>
  * @version $Id$
  **/
-final class TokenMatch implements Comparable {
+final class TokenMatch{
 
+    // Constants -----------------------------------------------------
+
+    private static final int WEAK_CACHE_INITIAL_CAPACITY = 2000;
+    private static final float WEAK_CACHE_LOAD_FACTOR = 0.5f;
+    private static final int WEAK_CACHE_CONCURRENCY_LEVEL = 16;
+
+    private static final ReferenceMap<Integer,TokenMatch> WEAK_CACHE
+            = new ReferenceMap<Integer,TokenMatch>(
+                ReferenceMap.Type.WEAK,
+                new ConcurrentHashMap<Integer,Reference<TokenMatch>>(
+                    WEAK_CACHE_INITIAL_CAPACITY,
+                    WEAK_CACHE_LOAD_FACTOR,
+                    WEAK_CACHE_CONCURRENCY_LEVEL));
+
+    // Static --------------------------------------------------------
+
+    /** Hands out an instance given the 'constructing arguments'.
+     * We use the flyweight pattern since instances are immutable.
+     *
+     * @param token
+     * @param match
+     * @param value
+     * @return
+     */
+    public static TokenMatch instanceOf(
+            final String token,
+            final String match,
+            final String value) {
+
+        final int hashCode = computeHashCode(token, match, value);
+
+        TokenMatch tm = WEAK_CACHE.get(hashCode);
+
+        if(null == tm){
+            tm = new TokenMatch(token, match, value);
+            WEAK_CACHE.put(hashCode, tm);
+        }
+
+        return tm;
+    }
+
+    private static int computeHashCode(
+            final String token,
+            final String match,
+            final String value) {
+
+        int result = 17;
+        result = 37*result + token.hashCode();
+        result = 37*result + match.hashCode();
+        result = 37*result + value.hashCode();
+        return result;
+    }
+
+    // Attributes ----------------------------------------------------
+
     private final String token;
     private final String match;
     private final String value;
-    private final Integer start;
-    private final Integer end;
     private final Pattern matcher;
-    /**
-     * Holds value of property _touched.
-     */
-    private boolean touched = false;
 
-    public TokenMatch(final String token, final String match, final String 
value, final int start, final int end) {
+    // Constructors -------------------------------------------------
+
+    private TokenMatch(final String token, final String match, final String 
value) {
+
         this.token = token;
         this.match = match;
         this.value = value;
-        this.start = Integer.valueOf(start);
-        this.end = Integer.valueOf(end);
         // (^|\s) or ($|\s) is neccessary to avoid matching fragments of words.
         matcher = Pattern.compile("(^|\\s)" + match + "($|\\s)", 
RegExpEvaluatorFactory.REG_EXP_OPTIONS);
     }
 
-    public int compareTo(final Object o) {
-        final TokenMatch other = (TokenMatch) o;
+    // Public --------------------------------------------------------
 
-        return start.compareTo(other.getStart());
-    }
-
     /**
-     * Get the start index.
-     *
-     * @return the end index.
-     */
-    public Integer getStart() {
-        return start;
-    }
-
-    /**
      * Get the match.
      *
      * @return the match.
@@ -75,7 +117,7 @@
     /**
      * Get the regular expression Matcher to use to find a sub-match.
      *
-     * @param string 
+     * @param string
      * @return the match.
      */
     public Matcher getMatcher(final String string) {
@@ -100,38 +142,23 @@
         return value;
     }
 
-    /**
-     * Get the end.
-     *
-     * @return the end.
-     */
-    public Integer getEnd() {
-        return end;
+    @Override
+    public String toString() {
+       return "token=\"" + token
+               + "\"; match=\"" + match
+               + "\"; value=" + (value == null ? "null" : "\"" + value + "\"")
+               + "; matcher=" + matcher + ";";
     }
 
-    /**
-     * Getter for property touched.
-     * @return Value of property touched.
-     */
-    public boolean isTouched() {
-        return touched;
+    @Override
+    public boolean equals(Object obj) {
+        return obj instanceof TokenMatch && obj.hashCode() == hashCode();
     }
 
-    /**
-     * Setter for property touched.
-     * @param touched New value of property touched.
-     */
-    public void setTouched(final boolean touched) {
-        this.touched = touched;
+    @Override
+    public int hashCode() {
+        return computeHashCode(token, match, value);
     }
 
-    @Override
-    public String toString() {
-       return "token=\"" + token
-               + "\"; match=\"" + match
-               + "\"; value=" + (value == null ? "null" : "\"" + value + "\"") 
-               + "; start=" + start 
-               + "; end=" + end
-               + "; matcher=" + matcher + ";";     
-    }
+
 }

Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
       2008-03-18 20:37:04 UTC (rev 6263)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
       2008-03-20 23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/* Copyright (2005-2007) Schibsted Søk AS
+/* Copyright (2005-2008) Schibsted Søk AS
  * This file is part of SESAT.
  *
  *   SESAT is free software: you can redistribute it and/or modify
@@ -61,7 +61,9 @@
 /**
  * VeryFastTokenEvaluator is part of no.sesat.search.query.
  *
+ * @todo sesat-ise. bring out to generic.sesam. make CGI_PATH easily 
configurable. configurable cache settings.
  *
+ *
  * @author Ola Marius Sagli <a href="[EMAIL PROTECTED]">ola at schibstedsok</a>
  * @author <a href="mailto:[EMAIL PROTECTED]">Mck Semb Wever</a>
  * @version $Id$
@@ -73,7 +75,7 @@
     }
 
     // Constants -----------------------------------------------------
-    
+
     private static final Logger LOG = 
Logger.getLogger(VeryFastTokenEvaluator.class);
     private static final String ERR_FAILED_INITIALISATION = "Failed reading 
configuration files";
     private static final String ERR_QUERY_FAILED = "Querying the fast list 
failed on ";
@@ -92,24 +94,28 @@
     private static final Map<Site,Map<TokenPredicate,String[]>> LIST_NAMES
             = new HashMap<Site,Map<TokenPredicate,String[]>>();
     private static final ReentrantReadWriteLock LIST_NAMES_LOCK = new 
ReentrantReadWriteLock();
-    
-    private static final GeneralCacheAdministrator CACHE = new 
GeneralCacheAdministrator();   
-    private static final int REFRESH_PERIOD = 60; // one minute
-    private static final int CACHE_CAPACITY = 100; // smaller than usual as 
each entry can contain up to 600 values!
-    
+
+    private static final GeneralCacheAdministrator CACHE_QUERY = new 
GeneralCacheAdministrator();
+    private static final int REFRESH_PERIOD = 60;
+    private static final int CACHE_QUERY_CAPACITY = 100; // smaller than usual 
as each entry can contain up to 600 values!
+
+    private static final GeneralCacheAdministrator CACHE_MATCHES = new 
GeneralCacheAdministrator();
+    private static final int CACHE_MATCHES_CAPACITY = 1000;
+
     private static final String OPERATOR_REGEX;
 
     // Attributes ----------------------------------------------------
-    
+
     private final HTTPClient httpClient;
     private final Context context;
     private final Map<String, List<TokenMatch>> analysisResult;
 
     // Static --------------------------------------------------------
-    
+
     static{
-        CACHE.setCacheCapacity(CACHE_CAPACITY);
-        
+        CACHE_QUERY.setCacheCapacity(CACHE_QUERY_CAPACITY);
+        CACHE_MATCHES.setCacheCapacity(CACHE_MATCHES_CAPACITY);
+
         // build our operator regular expression
         final StringBuilder operatorRegexpBuilder = new StringBuilder();
 
@@ -126,10 +132,10 @@
     }
 
     // Constructors -------------------------------------------------
-    
+
     /**
      * Search fast and initialize analysis result.
-     * @param cxt 
+     * @param cxt
      */
     VeryFastTokenEvaluator(final Context cxt) throws 
VeryFastListQueryException{
 
@@ -143,19 +149,15 @@
         final int port = 
Integer.parseInt(props.getProperty(TOKEN_PORT_PROPERTY));
 
         httpClient = HTTPClient.instance(host, port);
-        
+
         init();
 
         // Remove whitespace (except space itself) and operator characters.
-        analysisResult = queryFast(context.getQueryString()
-                .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
-                .replaceAll(SKIP_REGEX, "")
-                .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
-                .replaceAll(OPERATOR_REGEX, " ")); 
+        analysisResult = queryFast(cleanString(context.getQueryString()));
     }
 
     // Public --------------------------------------------------------
-    
+
     /**
      * Find out if given token is on or more of the following.
      *      <li>GEO
@@ -167,14 +169,14 @@
      * </ul>
      *
      * @param token  can be any of the above
-     * @param query 
+     * @param query
      * @return true if the query contains any of the above
      */
     public boolean evaluateToken(final TokenPredicate token, final String 
term, final String query) {
 
         boolean evaluation = false;
         final String[] listnames = getListNames(token);
-        
+
         if(null != listnames){
             for(int i = 0; !evaluation && i < listnames.length; ++i){
 
@@ -186,17 +188,14 @@
                     }  else  {
 
                         // HACK since DefaultOperatorClause wraps its children 
in parenthesis
-                        // Also remove any operator characters. (SEARCH-3883 & 
SEARCH-3967)
-                        final String hackTerm = 
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
+                        final String hackTerm = 
cleanString(term.replaceAll("\\(|\\)",""));
 
                         for (TokenMatch occurance : 
analysisResult.get(listname)) {
 
                             final Matcher m = occurance.getMatcher(hackTerm);
                             evaluation = m.find() && m.start() == 0 && m.end() 
== hackTerm.length();
 
-                            // keep track of which TokenMatch's we've used.
                             if (evaluation) {
-                                occurance.setTouched(true);
                                 break;
                             }
                         }
@@ -215,28 +214,26 @@
      * get all match values and values for given Fast list .
      *
      * @param token
-     * @param term 
+     * @param term
      * @return a list of Tokens
      */
     public Set<String> getMatchValues(final TokenPredicate token, final String 
term) {
-        
+
         final Set<String> values = new HashSet<String>();
-        
+
         final String[] listnames = getListNames(token);
         if(null != listnames){
             for(int i = 0; i < listnames.length; i++){
                 final String listname = listnames[i];
                 if (analysisResult.containsKey(listname)) {
-                    
+
                     // HACK since DefaultOperatorClause wraps its children in 
parenthesis
-                    // Also remove any operator characters. (SEARCH-3883 & 
SEARCH-3967)
-                    final String hackTerm = 
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
-                    
+                    final String hackTerm = 
cleanString(term.replaceAll("\\(|\\)",""));
+
                     for (TokenMatch occurance : analysisResult.get(listname)) {
-                        
+
                         final Matcher m = occurance.getMatcher(hackTerm);
 
-                        // keep track of which TokenMatch's we've used.
                         if (m.find() && m.start() == 0 && m.end() == 
hackTerm.length()) {
                             values.add(occurance.getValue());
                         }
@@ -248,20 +245,20 @@
     }
 
     /**
-     * 
+     *
      * @param predicate
      * @return
      */
     public boolean isQueryDependant(final TokenPredicate predicate) {
         return predicate.name().startsWith(EXACT_PREFIX.toUpperCase());
     }
-    
+
     // Package protected ---------------------------------------------
 
     // Protected -----------------------------------------------------
 
     // Private -------------------------------------------------------
-    
+
     private void init() {
 
         try {
@@ -276,17 +273,17 @@
         final Site site = cxt.getSite();
         final Site parent = site.getParent();
         final boolean parentUninitialised;
-        
+
         try{
             LIST_NAMES_LOCK.readLock().lock();
-            
+
             // initialise the parent site's configuration
             parentUninitialised = (null != parent && null == 
LIST_NAMES.get(parent));
-            
+
         }finally{
             LIST_NAMES_LOCK.readLock().unlock();
         }
-        
+
         if(parentUninitialised){
             initImpl(ContextWrapper.wrap(
                     Context.class,
@@ -298,10 +295,10 @@
                     cxt
                 ));
         }
-        
+
         try{
             LIST_NAMES_LOCK.writeLock().lock();
-        
+
             if(null == LIST_NAMES.get(site)){
 
                 // create map entry for this site
@@ -366,14 +363,14 @@
         if (query != null && 0 < query.length()) {
 
             try{
-                result = (Map<String, List<TokenMatch>>) 
CACHE.getFromCache(query, REFRESH_PERIOD);
+                result = (Map<String, List<TokenMatch>>) 
CACHE_QUERY.getFromCache(query, REFRESH_PERIOD);
 
             } catch (NeedsRefreshException nre) {
-            
+
                 boolean updatedCache = false;
                 result = new HashMap<String,List<TokenMatch>>();
                 String url = null;
-                
+
                 try {
                     final String token = 
URLEncoder.encode(query.replaceAll("\"", ""), "utf-8");
 
@@ -392,21 +389,21 @@
                         final String name = trans.getAttribute("NAME");
                         final String custom = trans.getAttribute("CUSTOM");
                         final String exactname = 0 <= 
name.indexOf(LIST_PREFIX) && 0 < name.indexOf(LIST_SUFFIX)
-                                ? LIST_PREFIX + EXACT_PREFIX 
+                                ? LIST_PREFIX + EXACT_PREFIX
                                     + name.substring(name.indexOf('_') + 1, 
name.indexOf("QM"))
                                     + LIST_SUFFIX
                                 : null;
 
-                        if(custom.matches(".+->.*") && usesListName(name, 
exactname)){ 
+                        if(custom.matches(".+->.*") && usesListName(name, 
exactname)){
 
                             final String match = (custom.indexOf("->") >0
                                     ? custom.substring(0, custom.indexOf("->"))
                                     : custom)
                                     // remove words made solely of characters 
that the parser considers whitespace
                                     .replaceAll("\\b" + SKIP_REGEX + "+\\b", " 
");
-                            
-                            final String value = custom.indexOf("->") > 0 
-                                    ? custom.substring(custom.indexOf("->") + 
2) 
+
+                            final String value = custom.indexOf("->") > 0
+                                    ? custom.substring(custom.indexOf("->") + 
2)
                                     : null;
 
                             addMatch(name, match, value,query, result);
@@ -417,7 +414,8 @@
                             }
                         }
                     }
-                    CACHE.putInCache(query, result);
+                    result = Collections.unmodifiableMap(result);
+                    CACHE_QUERY.putInCache(query, result);
                     updatedCache = true;
 
                 } catch (UnsupportedEncodingException ignore) {
@@ -432,8 +430,8 @@
                     result = (Map<String, 
List<TokenMatch>>)nre.getCacheContent();
                     throw new VeryFastListQueryException(ERR_PARSE_FAILED + 
url, e1);
                 }finally{
-                    if(!updatedCache){ 
-                        CACHE.cancelUpdate(query);
+                    if(!updatedCache){
+                        CACHE_QUERY.cancelUpdate(query);
                     }
                 }
             }
@@ -458,7 +456,7 @@
                 qNew);
 
         while (m.find()) {
-            final TokenMatch tknMatch = new TokenMatch(name, match, value, 
m.start(), m.end());
+            final TokenMatch tknMatch = TokenMatch.instanceOf(name, match, 
value);
 
             if (!result.containsKey(name)) {
                 result.put(name, new ArrayList<TokenMatch>());
@@ -467,27 +465,31 @@
             result.get(name).add(tknMatch);
 
             if (result.get(name).size() % 100 == 0) {
-                LOG.warn("Pattern: " + pattern.pattern() + " name: " + name + 
" query: " + query + " match: " + match + " query2: " + qNew);
+                LOG.warn("Pattern: " + pattern.pattern()
+                        + " name: " + name
+                        + " query: " + query
+                        + " match: " + match
+                        + " query2: " + qNew);
             }
         }
     }
 
     private boolean usesListName(final String listname, final String 
exactname){
-               
+
         boolean uses = false;
         try{
             LIST_NAMES_LOCK.readLock().lock();
             Site site = context.getSite();
-            
+
             while(!uses && null != site){
-                
+
                 // find listnames used for this token predicate
                 for(String[] listnames : LIST_NAMES.get(site).values()){
                     uses |= 0 <= Arrays.binarySearch(listnames, listname, 
null);
                     uses |= null != exactname && 0 <= 
Arrays.binarySearch(listnames, exactname, null);
                     if(uses){  break; }
                 }
-                                
+
                 // prepare to go to parent
                 site = site.getParent();
             }
@@ -496,20 +498,21 @@
         }
         return uses;
     }
+
     private String[] getListNames(final TokenPredicate token){
-        
-        
-        
+
+
+
         String[] listNames = null;
         try{
             LIST_NAMES_LOCK.readLock().lock();
             Site site = context.getSite();
-            
+
             while(null == listNames && null != site){
-                
+
                 // find listnames used for this token predicate
                 listNames = LIST_NAMES.get(site).get(token);
-                                
+
                 // prepare to go to parent
                 site = site.getParent();
             }
@@ -518,6 +521,18 @@
         }
         return listNames;
     }
-    
+
+    private String cleanString(final String string){
+
+        // Strip out SKIP characters we are not interested in.
+        // Also remove any operator characters. (SEARCH-3883 & SEARCH-3967)
+
+        return string
+                .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
+                .replaceAll(SKIP_REGEX, "")
+                .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
+                .replaceAll(OPERATOR_REGEX, " ");
+    }
+
     // Inner classes -------------------------------------------------
 }

Modified: branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj
===================================================================
--- branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj  2008-03-18 
20:37:04 UTC (rev 6263)
+++ branches/2.16/query-api/src/main/javacc/QueryParserImpl.jj  2008-03-20 
23:02:17 UTC (rev 6264)
@@ -1,4 +1,4 @@
-/** Copyright (2005-2007) Schibsted Søk AS 
+/** Copyright (2005-2008) Schibsted Søk AS
  *   This file is part of SESAT.
  *
  *   SESAT is free software: you can redistribute it and/or modify
@@ -29,7 +29,7 @@
 import java.util.ArrayList;
 import no.sesat.search.query.*;
 
-/** This class is NOT synchronized. You must use a separate instance for each 
query. 
+/** This class is NOT synchronized. You must use a separate instance for each 
query.
  *
  * @version $Id$
  * @author <a href="mailto:[EMAIL PROTECTED]>mick</a>
@@ -65,14 +65,14 @@
 
 PARSER_END(QueryParserImpl)
 
-/** Skip these characters, they are considered "white space" 
+/** Skip these characters, they are considered "white space"
  * See http://www.unicode.org/charts/ for unicode charts.
  *
  * WARNING!! These ranges are duplicated in QueryParser.java
  *             !! Always update it after any changes here !!
 **/
 <*>SKIP : {
-      " " | "!" 
+      " " | "!"
     | < [ "\u0023"-"\u0027" ] >
     | < [ "\u002a"-"\u002c" ] >
     | < [ "\u002e"-"\u002f" ] >
@@ -85,15 +85,15 @@
 }
 
 /* These are the the above skip characters */
-/* # % & ' */  
-/* * + , */    
+/* # % & ' */
+/* * + , */
 /* . / */
 /* ; < = > ? @ */
 /* [ \ ] ^ _ ` */
 /* ‐ ‑ ‒ – — ― */
 
 
<DEFAULT,EMAIL_DISABLED,PHONE_NUMBER_DISABLED,NUMBER_GROUP_DISABLED,QUOTED_WORD_DISABLED>
-// https?://([-\\w\\.]+)+(:\\d+)?(/~?([\\w/_\\.]*(\\?\\S+)?)?)? 
+// https?://([-\\w\\.]+)+(:\\d+)?(/~?([\\w/_\\.]*(\\?\\S+)?)?)?
 TOKEN : { <URL: 
"http"("s")?"://"<WORD>(":"<INTEGER>)?(<URL_WORD>)*(("?"<URL_WORD>)?)*("#"<URL_WORD>)?>
     | <#URL_WORD: (<WORD>|<ENCODED_CHAR>|"-"|"_"|"~"|"/"|"+"|"&")+>
     | <#ENCODED_CHAR: 
"%"(<DIGIT>|"A"|"a"|"B"|"b"|"C"|"c"|"D"|"d"|"E"|"e"|"F"|"f"){2}>}
@@ -101,7 +101,7 @@
 
<DEFAULT,URL_DISABLED,PHONE_NUMBER_DISABLED,NUMBER_GROUP_DISABLED,QUOTED_WORD_DISABLED>
 TOKEN : { <EMAIL: <WORD>"@"<WORD>"."<WORD>> }
 
-// Very generic to match any Internation Phone Number !! 
+// Very generic to match any Internation Phone Number !!
 //   TODO deal with optional area codes. eg australian numbers can be written 
(02) 4878 9336
 // number_group_disabled is a super state to phone_number_disabled
 <DEFAULT,URL_DISABLED,EMAIL_DISABLED,QUOTED_WORD_DISABLED>
@@ -117,7 +117,7 @@
 <*>TOKEN : {
       <AND: ("AND"|"+")>
     | <OR:  ("OR"|"|")>
-    | <NOT: "NOT"|"-">              
+    | <NOT: "NOT"|"-">
     | <ANDNOT: ("ANDNOT")>
     | <INTEGER: ((<DIGIT>)+)>
     | <WORD: 
(<LETTER>|<DIGIT>|<WORD_SYMBOL_PREFIX>)(<LETTER>|<DIGIT>|<WORD_SYMBOL_MIDDLE>)*("*")?>
@@ -127,8 +127,8 @@
     | <#PHONE_SYMBOL: (".")|("-")|("/")>
     | <#WORD_SYMBOL_PREFIX: (".")|("<")|("=")|(">")>
     | <#WORD_SYMBOL_MIDDLE: (".")|<HYPON>|("_")|("+")>
-    | <#WORD_SEPARATOR: [ // just a copy of the SKIP declaration.
-            " ", "!", 
+    | <#WORD_SEPARATOR: [ // just a copy of the SKIP declaration. see SKIP 
comment!
+            " ", "!",
             "\u0023"-"\u0040",
             "\u005b"-"\u0060",
             "\u007b"-"\u00bf",
@@ -173,11 +173,11 @@
           ]>
 }
 
-Clause parse() : { 
+Clause parse() : {
   Clause clause;
   LOG.info("parsing: "+context.getQueryString());
 }{
-        (clause = rootPrecedence()) {return clause;}        
+        (clause = rootPrecedence()) {return clause;}
 }
 
 /** PRECEDENCES **/
@@ -196,16 +196,16 @@
     (clause = hiddenDefaultOperation()) { try{return 
clause;}finally{exitMethod();} }
     |
     (clause = looseJoinPrecedence()) { try{return 
clause;}finally{exitMethod();} }
-    
+
 }
 
 Clause looseJoinPrecedence() :{
     Clause clause;
     enterMethod("looseJoinPrecedence()");
 }{
-    (clause = orOperation()) { try{return clause;}finally{exitMethod();} }  
+    (clause = orOperation()) { try{return clause;}finally{exitMethod();} }
     |
-    (clause = strongJoinPrecedence()) { try{return 
clause;}finally{exitMethod();} } 
+    (clause = strongJoinPrecedence()) { try{return 
clause;}finally{exitMethod();} }
 }
 
 Clause strongJoinPrecedence() :{
@@ -215,8 +215,8 @@
     (clause = andOperation()) { try{return clause;}finally{exitMethod();} }
     |
     (clause = leafPrecedence()) { try{return clause;}finally{exitMethod();} }
-    
 
+
 }
 
 Clause leafPrecedence() :{
@@ -226,7 +226,7 @@
     (clause = notOperation()) { try{return clause;}finally{exitMethod();} }
     |
     (clause = andNotOperation()) { try{return clause;}finally{exitMethod();} }
-    | 
+    |
     (clause = leaf()) { try{return clause;}finally{exitMethod();} }
 }
 
@@ -236,7 +236,7 @@
     Clause left,right;
     enterMethod("hiddenDefaultOperation()");
 }{
-    ((left = looseJoinPrecedence())(right = noPrecedence())) 
+    ((left = looseJoinPrecedence())(right = noPrecedence()))
         { try{return 
context.createDefaultOperatorClause(left,right);}finally{exitMethod();} }
 }
 
@@ -246,7 +246,7 @@
     enterMethod("andNotOperation()");
 }{
     (<ANDNOT>(right = noPrecedence())) { try{return 
context.createAndNotClause(right);}finally{exitMethod();} }
-}  
+}
 
 Clause orOperation() :{
     Clause left,right;
@@ -255,7 +255,7 @@
 }{
     ("("(clause = hiddenOrOperation())")") { try{return 
clause;}finally{exitMethod();} }
     |
-    ((left = strongJoinPrecedence())<OR>(right = looseJoinPrecedence())) 
+    ((left = strongJoinPrecedence())<OR>(right = looseJoinPrecedence()))
         { try{return 
context.createOrClause(left,right);}finally{exitMethod();} }
 }
 
@@ -263,7 +263,7 @@
     Clause left,right;
     enterMethod("andOperation()");
 }{
-    ((left = leafPrecedence())<AND>(right = strongJoinPrecedence())) 
+    ((left = leafPrecedence())<AND>(right = strongJoinPrecedence()))
         { try{return 
context.createAndClause(left,right);}finally{exitMethod();} }
 }
 
@@ -271,10 +271,10 @@
     Clause left,right;
     enterMethod("hiddenAndOperation()");
 }{
-    ((left = leafPrecedence())(right = hiddenAndOperation())) 
+    ((left = leafPrecedence())(right = hiddenAndOperation()))
         { try{return 
context.createAndClause(left,right);}finally{exitMethod();} }
     |
-    ((left = leafPrecedence())(right = strongJoinPrecedence())) 
+    ((left = leafPrecedence())(right = strongJoinPrecedence()))
         { try{return 
context.createAndClause(left,right);}finally{exitMethod();} }
 }
 
@@ -282,17 +282,17 @@
     Clause left,right;
     enterMethod("hiddenOrOperation()");
 }{
-    
+
     // These are the real hidden or operands
-    ((left = leafPrecedence())(right = hiddenOrOperation())) 
+    ((left = leafPrecedence())(right = hiddenOrOperation()))
         { try{return 
context.createOrClause(left,right);}finally{exitMethod();} }
     |
-    ((left = strongJoinPrecedence())(right = looseJoinPrecedence())) 
+    ((left = strongJoinPrecedence())(right = looseJoinPrecedence()))
         { try{return 
context.createOrClause(left,right);}finally{exitMethod();} }
     |
     // Not really an OR clause. Just () wrapped around something else. 
Therefore ignore the ()'s
     (left = leafPrecedence()) { try{return left;}finally{exitMethod();} }
-    
+
 }
 
 NotClause notOperation() :{
@@ -313,20 +313,20 @@
     // A real field
     ((field=<WORD>)<FIELD_SUFFIX>)(clause = fieldedLeaf(field)) {try{return 
clause;}finally{exitMethod();}}
     |
-    // An accidential field. 
+    // An accidential field.
     // XXX This could cause problems as it destroys the construction of a 
right-leaning forests.
-    (left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = fieldedLeaf(null)) 
+    (left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = fieldedLeaf(null))
         {try{return 
context.createDefaultOperatorClause(left,right);}finally{exitMethod();}}
     |
     (clause = fieldedLeaf(null)) {try{return clause;}finally{exitMethod();}}
 }
-    
 
+
 Clause fieldedLeaf(final Token field) :{
     enterMethod("fieldedLeaf()");
 }{
-    <PHONE_NUMBER> 
-        { 
+    <PHONE_NUMBER>
+        {
             try{
                 // remove the PHONE_SYMBOL sub-tokens
                 final String term = token.image.replaceAll("\\.|-|/| 
|\\(|\\)","");
@@ -340,9 +340,9 @@
 
             }finally{exitMethod();}
         }
-    | 
+    |
     <NUMBER_GROUP>
-        { 
+        {
             try{
                 // Remove whitespace
                 final String term = token.image.trim().replaceAll(" ","");
@@ -358,7 +358,7 @@
         }
     |
     <QUOTED_WORD>
-        { 
+        {
             try{
                 final String f = field == null ? null : field.image;
                 final PhraseClause phClause = 
context.createPhraseClause(token.image, f );
@@ -371,18 +371,18 @@
 
             }finally{exitMethod();}
         }
-    | 
+    |
     <WORD>
-        { 
+        {
             try{
                 final String f = field == null ? null : field.image;
                 return context.createWordClause(token.image, f);
 
             }finally{exitMethod();}
         }
-    | 
+    |
     <INTEGER>
-        { 
+        {
             try{
                 final String f = field == null ? null : field.image;
                 return context.createIntegerClause(token.image, f);
@@ -391,16 +391,16 @@
         }
     |
     <URL>
-        { 
+        {
             try{
                 final String f = field == null ? null : field.image;
                 return context.createUrlClause(token.image, f);
 
             }finally{exitMethod();}
         }
-    | 
+    |
     <EMAIL>
-        { 
+        {
             try{
                 final String f = field == null ? null : field.image;
                 return context.createEmailClause(token.image, f);

Modified: 
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
===================================================================
--- 
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
  2008-03-18 20:37:04 UTC (rev 6263)
+++ 
branches/2.16/result-spi/src/main/java/no/sesat/search/result/BasicResultItem.java
  2008-03-20 23:02:17 UTC (rev 6264)
@@ -25,52 +25,54 @@
 
 /**
  * A simple implementation of a search result item.
- * Is not multi-thread safe. 
+ * Is not multi-thread safe.
  * Mutates on setter methods.
  * Delegates all fields (of all types) to the one map.
+ * 
+ * Any field "recordid" is considered as a key to equality between result 
items.
  *
  * @author <a href="mailto:[EMAIL PROTECTED]">Magnus Eklund</a>
  * @version <tt>$Id$</tt>
  */
 public class BasicResultItem implements ResultItem {
-    
+
     private static final String URL_KEY = "url";
     private static final String TITLE_KEY = "title";
 
     private final HashMap<String,Serializable> fields = new 
HashMap<String,Serializable>();
-    
+
     /**
-     * 
+     *
      */
     public BasicResultItem(){}
-    
+
     /**
-     * 
+     *
      * @param title
-     * @param url 
+     * @param url
      */
     protected BasicResultItem(final String title, final String url){
-        
+
         fields.put(TITLE_KEY, StringChopper.chop(title, -1));
         fields.put(URL_KEY, StringChopper.chop(url, -1));
     }
-    
+
     /**
-     * 
-     * @param copy 
+     *
+     * @param copy
      */
     public BasicResultItem(final ResultItem copy){
-        
+
        for(String fieldName : copy.getFieldNames()){
            fields.put(fieldName, copy.getObjectField(fieldName));
        }
     }
 
     /**
-     * 
-     * @param field 
-     * @param value 
-     * @return 
+     *
+     * @param field
+     * @param value
+     * @return
      */
     public BasicResultItem addField(final String field, final String value) {
 
@@ -79,9 +81,9 @@
     }
 
     /**
-     * 
-     * @param field 
-     * @return 
+     *
+     * @param field
+     * @return
      */
     public String getField(final String field) {
 
@@ -90,9 +92,9 @@
     }
 
     /**
-     * 
-     * @param field 
-     * @return 
+     *
+     * @param field
+     * @return
      */
     public Serializable getObjectField(final String field) {
 
@@ -100,21 +102,21 @@
     }
 
     /**
-     * 
-     * @param field 
-     * @param value 
-     * @return 
+     *
+     * @param field
+     * @param value
+     * @return
      */
     public BasicResultItem addObjectField(final String field, final 
Serializable value) {
-        
+
         fields.put(field, value);
         return this;
     }
-    
+
     /**
-     * 
-     * @param field 
-     * @return 
+     *
+     * @param field
+     * @return
      */
     public Integer getInteger(final String field) {
 
@@ -123,23 +125,23 @@
     }
 
     /**
-     * 
-     * @param field 
-     * @param maxLength 
-     * @return 
+     *
+     * @param field
+     * @param maxLength
+     * @return
      */
     public String getField(final String field, final int maxLength) {
-        
+
         final String fieldValue = (String) fields.get(field);
-        
+
         return fieldValue != null && fieldValue.trim().length() > 0
                 ? StringChopper.chop(fieldValue, maxLength)
                 : null;
     }
 
     /** Returns a defensive copy of the field names existing in this 
resultItem.
-     * 
-     * @return 
+     *
+     * @return
      */
     public Collection<String> getFieldNames() {
 
@@ -147,9 +149,9 @@
     }
 
     /** Returns a live copy of the field's collection.
-     * 
-     * @param field 
-     * @return 
+     *
+     * @param field
+     * @return
      */
     public Collection<String> getMultivaluedField(final String field) {
 
@@ -157,13 +159,13 @@
     }
 
     /**
-     * 
-     * @param field 
-     * @param value 
-     * @return 
+     *
+     * @param field
+     * @param value
+     * @return
      */
     public BasicResultItem addToMultivaluedField(final String field, final 
String value) {
-        
+
         if (! fields.containsKey(field)) {
             fields.put(field, new ArrayList<String>());
         }
@@ -173,8 +175,9 @@
         return this;
     }
 
+    @Override
     public boolean equals(final Object obj) {
-        
+
         boolean result = false;
         if( obj instanceof ResultItem ){
             final ResultItem other = (ResultItem) obj;
@@ -198,12 +201,12 @@
         return result;
     }
 
+    @Override
     public int hashCode() {
 
-        // FIXME very specific undocumented stuff here
         if (getField("recordid") != null) {
             return getField("recordid").hashCode();
-            
+
         } else {
             // there nothing else to this object than the fields map.
             return fields.hashCode();

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

[Kernel-commits] r6264 - in branches/2.16: query-api/src/main/java/no/sesat/search/query/parser query-api/src/main/java/no/sesat/search/query/token query-api/src/main/javacc result-spi/src/main/java/no/sesat/search/result

Reply via email to