Author: ssmiweve Date: 2008-02-14 19:58:45 +0100 (Thu, 14 Feb 2008) New Revision: 6121
Modified: branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java Log: SEARCH-4274 - Use Fast lists as dicitionaries in SESAT and search commands Modified: branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java =================================================================== --- branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java 2008-02-14 16:30:08 UTC (rev 6120) +++ branches/2.16/query-api/src/main/java/no/sesat/search/query/token/TokenMatch.java 2008-02-14 18:58:45 UTC (rev 6121) @@ -20,13 +20,16 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -/** +/** Used by VeryFastTokenEvaluator for matches against part of the query to a fast list. + * + * @author <a href="mailto:[EMAIL PROTECTED]">Mck</a> * @version $Id$ **/ -public final class TokenMatch implements Comparable { +final class TokenMatch implements Comparable { private final String token; private final String match; + private final String value; private final Integer start; private final Integer end; private final Pattern matcher; @@ -35,9 +38,10 @@ */ private boolean touched = false; - public TokenMatch(final String token, final String match, final int start, final int end) { + public TokenMatch(final String token, final String match, final String value, final int start, final int end) { this.token = token; this.match = match; + this.value = value; this.start = Integer.valueOf(start); this.end = Integer.valueOf(end); // (^|\s) or ($|\s) is neccessary to avoid matching fragments of words. @@ -71,6 +75,7 @@ /** * Get the regular expression Matcher to use to find a sub-match. * + * @param string * @return the match. */ public Matcher getMatcher(final String string) { @@ -87,6 +92,15 @@ } /** + * Get the Fast value. + * + * @return the value. + */ + public String getValue() { + return value; + } + + /** * Get the end. * * @return the end. @@ -110,4 +124,14 @@ public void setTouched(final boolean touched) { this.touched = touched; } + + @Override + public String toString() { + return "token=\"" + token + + "\"; match=\"" + match + + "\"; value=" + (value == null ? "null" : "\"" + value + "\"") + + "; start=" + start + + "; end=" + end + + "; matcher=" + matcher + ";"; + } } Modified: branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java =================================================================== --- branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java 2008-02-14 16:30:08 UTC (rev 6120) +++ branches/2.16/query-api/src/main/java/no/sesat/search/query/token/VeryFastTokenEvaluator.java 2008-02-14 18:58:45 UTC (rev 6121) @@ -29,6 +29,8 @@ import java.util.Map; import java.util.Properties; import java.util.Collections; +import java.util.HashSet; +import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -136,7 +138,7 @@ /** * Search fast and initialize analysis result. - * @param query + * @param cxt */ VeryFastTokenEvaluator(final Context cxt) throws VeryFastListQueryException{ @@ -217,7 +219,44 @@ return evaluation; } - /** + + /** + * get all match values and values for given Fast list . + * + * @param token + * @param term + * @return a list of Tokens + */ + public Set<String> getMatchValues(final TokenPredicate token, final String term) { + + final Set<String> values = new HashSet<String>(); + + final String[] listnames = getListNames(token); + if(null != listnames){ + for(int i = 0; i < listnames.length; i++){ + final String listname = listnames[i]; + if (analysisResult.containsKey(listname)) { + + // HACK since DefaultOperatorClause wraps its children in parenthesis + // Also remove any operator characters. (SEARCH-3883 & SEARCH-3967) + final String hackTerm = term.replaceAll("\\(|\\)","").replaceAll(OPERATOR_REGEX, ""); + + for (TokenMatch occurance : analysisResult.get(listname)) { + + final Matcher m = occurance.getMatcher(hackTerm); + + // keep track of which TokenMatch's we've used. + if (m.find() && m.start() == 0 && m.end() == hackTerm.length()) { + values.add(occurance.getValue()); + } + } + } + } + } + return Collections.unmodifiableSet(values); + } + + /** * * @param predicate * @return @@ -367,19 +406,23 @@ + LIST_SUFFIX : null; - if(custom.endsWith("->") && usesListName(name, exactname)){ + if(custom.matches(".+->.*") && usesListName(name, exactname)){ final String match = (custom.indexOf("->") >0 ? custom.substring(0, custom.indexOf("->")) : custom) // remove words made solely of characters that the parser considers whitespace .replaceAll("\\b" + SKIP_REGEX + "+\\b", " "); + + final String value = custom.indexOf("->") > 0 + ? custom.substring(custom.indexOf("->") + 2) + : null; - addMatch(name, match, query, result); + addMatch(name, match, value,query, result); if (match.equalsIgnoreCase(query.trim())) { - addMatch(exactname, match, query, result); + addMatch(exactname, match, value, query, result); } } } @@ -410,11 +453,12 @@ } private static void addMatch( - final String name, - final String match, + final String name, + final String match, + final String value, final String query, final Map<String, List<TokenMatch>> result) { - + final String expr = "\\b" + match + "\\b"; final Pattern pattern = Pattern.compile(expr, RegExpEvaluatorFactory.REG_EXP_OPTIONS); final String qNew = query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " "); @@ -423,9 +467,8 @@ qNew); while (m.find()) { + final TokenMatch tknMatch = new TokenMatch(name, match, value, m.start(), m.end()); - final TokenMatch tknMatch = new TokenMatch(name, match, m.start(), m.end()); - if (!result.containsKey(name)) { result.put(name, new ArrayList<TokenMatch>()); } _______________________________________________ Kernel-commits mailing list [email protected] http://sesat.no/mailman/listinfo/kernel-commits
