Author: ssmiweve
Date: 2008-02-14 19:58:45 +0100 (Thu, 14 Feb 2008)
New Revision: 6121

Modified:
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
Log:
SEARCH-4274 - Use Fast lists as dicitionaries in SESAT and search commands


Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   2008-02-14 16:30:08 UTC (rev 6120)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java
   2008-02-14 18:58:45 UTC (rev 6121)
@@ -20,13 +20,16 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-/**
+/** Used by VeryFastTokenEvaluator for matches against part of the query to a 
fast list.
+ * 
+ * @author <a href="mailto:[EMAIL PROTECTED]">Mck</a>
  * @version $Id$
  **/
-public final class TokenMatch implements Comparable {
+final class TokenMatch implements Comparable {
 
     private final String token;
     private final String match;
+    private final String value;
     private final Integer start;
     private final Integer end;
     private final Pattern matcher;
@@ -35,9 +38,10 @@
      */
     private boolean touched = false;
 
-    public TokenMatch(final String token, final String match, final int start, 
final int end) {
+    public TokenMatch(final String token, final String match, final String 
value, final int start, final int end) {
         this.token = token;
         this.match = match;
+        this.value = value;
         this.start = Integer.valueOf(start);
         this.end = Integer.valueOf(end);
         // (^|\s) or ($|\s) is neccessary to avoid matching fragments of words.
@@ -71,6 +75,7 @@
     /**
      * Get the regular expression Matcher to use to find a sub-match.
      *
+     * @param string 
      * @return the match.
      */
     public Matcher getMatcher(final String string) {
@@ -87,6 +92,15 @@
     }
 
     /**
+     * Get the Fast value.
+     *
+     * @return the value.
+     */
+    public String getValue() {
+        return value;
+    }
+
+    /**
      * Get the end.
      *
      * @return the end.
@@ -110,4 +124,14 @@
     public void setTouched(final boolean touched) {
         this.touched = touched;
     }
+
+    @Override
+    public String toString() {
+       return "token=\"" + token
+               + "\"; match=\"" + match
+               + "\"; value=" + (value == null ? "null" : "\"" + value + "\"") 
+               + "; start=" + start 
+               + "; end=" + end
+               + "; matcher=" + matcher + ";";     
+    }
 }

Modified: 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
===================================================================
--- 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
       2008-02-14 16:30:08 UTC (rev 6120)
+++ 
branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java
       2008-02-14 18:58:45 UTC (rev 6121)
@@ -29,6 +29,8 @@
 import java.util.Map;
 import java.util.Properties;
 import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -136,7 +138,7 @@
     
     /**
      * Search fast and initialize analysis result.
-     * @param query
+     * @param cxt 
      */
     VeryFastTokenEvaluator(final Context cxt) throws 
VeryFastListQueryException{
 
@@ -217,7 +219,44 @@
         return evaluation;
     }
 
-    /** 
+
+    /**
+     * get all match values and values for given Fast list .
+     *
+     * @param token
+     * @param term 
+     * @return a list of Tokens
+     */
+    public Set<String> getMatchValues(final TokenPredicate token, final String 
term) {
+        
+        final Set<String> values = new HashSet<String>();
+        
+        final String[] listnames = getListNames(token);
+        if(null != listnames){
+            for(int i = 0; i < listnames.length; i++){
+                final String listname = listnames[i];
+                if (analysisResult.containsKey(listname)) {
+                    
+                    // HACK since DefaultOperatorClause wraps its children in 
parenthesis
+                    // Also remove any operator characters. (SEARCH-3883 & 
SEARCH-3967)
+                    final String hackTerm = 
term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, "");
+                    
+                    for (TokenMatch occurance : analysisResult.get(listname)) {
+                        
+                        final Matcher m = occurance.getMatcher(hackTerm);
+
+                        // keep track of which TokenMatch's we've used.
+                        if (m.find() && m.start() == 0 && m.end() == 
hackTerm.length()) {
+                            values.add(occurance.getValue());
+                        }
+                    }
+                }
+            }
+        }
+        return Collections.unmodifiableSet(values);
+    }
+
+    /**
      * 
      * @param predicate
      * @return
@@ -367,19 +406,23 @@
                                     + LIST_SUFFIX
                                 : null;
 
-                        if(custom.endsWith("->") && usesListName(name, 
exactname)){
+                        if(custom.matches(".+->.*") && usesListName(name, 
exactname)){ 
 
                             final String match = (custom.indexOf("->") >0
                                     ? custom.substring(0, custom.indexOf("->"))
                                     : custom)
                                     // remove words made solely of characters 
that the parser considers whitespace
                                     .replaceAll("\\b" + SKIP_REGEX + "+\\b", " 
");
+                            
+                            final String value = custom.indexOf("->") > 0 
+                                    ? custom.substring(custom.indexOf("->") + 
2) 
+                                    : null;
 
-                            addMatch(name, match, query, result);
+                            addMatch(name, match, value,query, result);
 
                             if (match.equalsIgnoreCase(query.trim())) {
 
-                                addMatch(exactname, match, query, result);
+                                addMatch(exactname, match, value, query, 
result);
                             }
                         }
                     }
@@ -410,11 +453,12 @@
     }
 
     private static void addMatch(
-            final String name, 
-            final String match, 
+            final String name,
+            final String match,
+            final String value,
             final String query,
             final Map<String, List<TokenMatch>> result) {
-        
+
         final String expr = "\\b" + match + "\\b";
         final Pattern pattern = Pattern.compile(expr, 
RegExpEvaluatorFactory.REG_EXP_OPTIONS);
         final String qNew = query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
@@ -423,9 +467,8 @@
                 qNew);
 
         while (m.find()) {
+            final TokenMatch tknMatch = new TokenMatch(name, match, value, 
m.start(), m.end());
 
-            final TokenMatch tknMatch = new TokenMatch(name, match, m.start(), 
m.end());
-            
             if (!result.containsKey(name)) {
                 result.put(name, new ArrayList<TokenMatch>());
             }

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to