Author: ssmiweve Date: 2007-06-25 22:21:13 +0200 (Mon, 25 Jun 2007) New Revision: 5425
Modified: branches/2.14/query-api/src/main/java/no/schibstedsok/searchportal/query/token/VeryFastTokenEvaluator.java Log: SEARCH-2910 - Match in no_keyword_reserved doesn't result in query with boundry match Modified: branches/2.14/query-api/src/main/java/no/schibstedsok/searchportal/query/token/VeryFastTokenEvaluator.java =================================================================== --- branches/2.14/query-api/src/main/java/no/schibstedsok/searchportal/query/token/VeryFastTokenEvaluator.java 2007-06-25 15:26:07 UTC (rev 5424) +++ branches/2.14/query-api/src/main/java/no/schibstedsok/searchportal/query/token/VeryFastTokenEvaluator.java 2007-06-25 20:21:13 UTC (rev 5425) @@ -2,11 +2,13 @@ package no.schibstedsok.searchportal.query.token; +import com.opensymphony.oscache.base.NeedsRefreshException; +import com.opensymphony.oscache.general.GeneralCacheAdministrator; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; -import java.util.Collections; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -39,7 +41,6 @@ /** * VeryFastTokenEvaluator is part of no.schibstedsok.searchportal.query. * - * TODO would make sense to split this class into an Evaluator and a factory, similar to RegExpEvaluator. * * @author Ola Marius Sagli <a href="[EMAIL PROTECTED]">ola at schibstedsok</a> * @author <a href="mailto:[EMAIL PROTECTED]">Michael Semb Wever</a> @@ -57,10 +58,8 @@ private static final String ERR_FAILED_INITIALISATION = "Failed reading configuration files"; private static final String ERR_QUERY_FAILED = "Querying the fast list failed on "; private static final String ERR_PARSE_FAILED = "XML parsing of fast list response failed on "; - private static final String DEBUG_LISTNAME_FOUND_1 = "List for "; - private static final String DEBUG_LISTNAME_FOUND_2 = " is "; - /** TODO comment me. **/ + /** The configuration file from the skin that specifies token predicate to list mappings. **/ public static final String VERYFAST_EVALUATOR_XMLFILE = "VeryFastEvaluators.xml"; private static final String TOKEN_HOST_PROPERTY = "tokenevaluator.host"; private static final String TOKEN_PORT_PROPERTY = "tokenevaluator.port"; @@ -73,16 +72,22 @@ private static final Map<Site,Map<TokenPredicate,String[]>> LIST_NAMES = new HashMap<Site,Map<TokenPredicate,String[]>>(); private static final ReentrantReadWriteLock LIST_NAMES_LOCK = new ReentrantReadWriteLock(); + + private static final GeneralCacheAdministrator CACHE = new GeneralCacheAdministrator(); + private static final int REFRESH_PERIOD = 60; // one minute + private static final int CACHE_CAPACITY = 1000; // Attributes ---------------------------------------------------- - private volatile boolean init = false; - private final HTTPClient httpClient; private final Context context; - private final Map<String, List<TokenMatch>> analysisResult = new HashMap<String,List<TokenMatch>>(); + private final Map<String, List<TokenMatch>> analysisResult; // Static -------------------------------------------------------- + + static{ + CACHE.setCacheCapacity(CACHE_CAPACITY); + } // Constructors ------------------------------------------------- @@ -103,7 +108,8 @@ httpClient = HTTPClient.instance(host, port); - queryFast(context.getQueryString()); + init(); + analysisResult = queryFast(context.getQueryString()); } // Public -------------------------------------------------------- @@ -124,7 +130,7 @@ public boolean evaluateToken(final TokenPredicate token, final String term, final String query) { boolean evaluation = false; - final String[] listnames = getFastListNames(token); + final String[] listnames = getListNames(token); if(null != listnames){ for(int i = 0; !evaluation && i < listnames.length; ++i){ @@ -176,13 +182,10 @@ try{ LIST_NAMES_LOCK.writeLock().lock(); - if (!init) { - try { - initImpl(context); - } catch (ParserConfigurationException ex) { - LOG.error(ERR_FAILED_INITIALISATION, ex); - } - init = true; + try { + initImpl(context); + } catch (ParserConfigurationException ex) { + LOG.error(ERR_FAILED_INITIALISATION, ex); } }finally{ LIST_NAMES_LOCK.writeLock().unlock(); @@ -233,10 +236,20 @@ final TokenPredicate token = TokenPredicate.valueOf(tokenName); - final String[] l = list.getAttribute("list-name").split(","); + final String[] listNameArr = list.getAttribute("list-name").split(","); LOG.info(" ->lists: " + list.getAttribute("list-name")); - listNames.put(token, l); + // update each listname to the format the fast query matching servers use + if(null != listNameArr){ + for(int j = 0; j < listNameArr.length; ++j){ + listNameArr[j] = REAL_TOKEN_PREFIX + listNameArr[j] + REAL_TOKEN_SUFFIX; + } + + // put the listnames in + Arrays.sort(listNameArr, null); + listNames.put(token, listNameArr); + } + } } @@ -248,62 +261,87 @@ * Search fast and find out if the given tokens are company, firstname, lastname etc * @param query */ - private void queryFast(final String query) throws VeryFastListQueryException{ + private Map<String, List<TokenMatch>> queryFast(final String query) throws VeryFastListQueryException{ LOG.trace("queryFast( " + query + " )"); + Map<String, List<TokenMatch>> result = null; - if (query == null || query.equals("")) { - return; - } + if (query != null && 0 < query.length()) { - String url = null; - try { - final String token = URLEncoder.encode(query.replaceAll("\"", ""), "utf-8"); + try{ + result = (Map<String, List<TokenMatch>>) CACHE.getFromCache(query, REFRESH_PERIOD); - url = CGI_PATH + token; + } catch (NeedsRefreshException nre) { + + boolean updatedCache = false; + result = new HashMap<String,List<TokenMatch>>(); + String url = null; + + try { + final String token = URLEncoder.encode(query.replaceAll("\"", ""), "utf-8"); - final Document doc = httpClient.getXmlDocument(url); + url = CGI_PATH + token; - NodeList l = doc.getElementsByTagName("QUERYTRANSFORMS"); - final Element e = (Element) l.item(0); + final Document doc = httpClient.getXmlDocument(url); - l = e.getElementsByTagName("QUERYTRANSFORM"); + NodeList l = doc.getElementsByTagName("QUERYTRANSFORMS"); + final Element e = (Element) l.item(0); - for (int i = 0; i < l.getLength(); ++i) { + l = e.getElementsByTagName("QUERYTRANSFORM"); - final Element trans = (Element) l.item(i); - final String name = trans.getAttribute("NAME"); - final String custom = trans.getAttribute("CUSTOM"); - - if(custom.endsWith("->")){ - - final String match = custom.indexOf("->") >0 - ? custom.substring(0, custom.indexOf("->")) - : custom; + for (int i = 0; i < l.getLength(); ++i) { - addMatch(name, match, query); + final Element trans = (Element) l.item(i); + final String name = trans.getAttribute("NAME"); + final String custom = trans.getAttribute("CUSTOM"); - if (match.equalsIgnoreCase(query.trim())) { + if(custom.endsWith("->") && usesListName(name)){ - final String key = name.substring(name.indexOf('_') + 1, name.indexOf("QM")); + final String match = custom.indexOf("->") >0 + ? custom.substring(0, custom.indexOf("->")) + : custom; - addMatch(REAL_TOKEN_PREFIX + EXACT_PREFIX + key + REAL_TOKEN_SUFFIX, match, query); + addMatch(name, match, query, result); + + if (match.equalsIgnoreCase(query.trim())) { + + final String fullname = REAL_TOKEN_PREFIX + EXACT_PREFIX + + name.substring(name.indexOf('_') + 1, name.indexOf("QM")) + + REAL_TOKEN_SUFFIX; + + addMatch(fullname, match, query, result); + } + } } + CACHE.putInCache(query, result); + updatedCache = true; + + } catch (UnsupportedEncodingException ignore) { + LOG.warn(ERR_FAILED_TO_ENCODE + query); + result = (Map<String, List<TokenMatch>>)nre.getCacheContent(); + } catch (IOException e1) { + LOG.error(ERR_QUERY_FAILED + url, e1); + result = (Map<String, List<TokenMatch>>)nre.getCacheContent(); + throw new VeryFastListQueryException(ERR_QUERY_FAILED + url, e1); + } catch (SAXException e1) { + LOG.error(ERR_PARSE_FAILED + url, e1); + result = (Map<String, List<TokenMatch>>)nre.getCacheContent(); + throw new VeryFastListQueryException(ERR_PARSE_FAILED + url, e1); + }finally{ + if(!updatedCache){ + CACHE.cancelUpdate(query); + } } } - - } catch (UnsupportedEncodingException ignore) { - LOG.warn(ERR_FAILED_TO_ENCODE + query); - } catch (IOException e1) { - LOG.error(ERR_QUERY_FAILED + url, e1); - throw new VeryFastListQueryException(ERR_QUERY_FAILED + url, e1); - } catch (SAXException e1) { - LOG.error(ERR_PARSE_FAILED + url, e1); - throw new VeryFastListQueryException(ERR_PARSE_FAILED + url, e1); } + return result; } - private void addMatch(final String name, final String match, final String query) { + private static void addMatch( + final String name, + final String match, + final String query, + final Map<String, List<TokenMatch>> result) { final String expr = "\\b" + match + "\\b"; final Pattern pattern = Pattern.compile(expr, RegExpEvaluatorFactory.REG_EXP_OPTIONS); @@ -313,37 +351,53 @@ final TokenMatch tknMatch = new TokenMatch(name, match, m.start(), m.end()); - // XXX will store match on every countries different lists supplied in the qm result. Restrict to skin. - if (!analysisResult.containsKey(name)) { - analysisResult.put(name, new ArrayList()); + if (!result.containsKey(name)) { + result.put(name, new ArrayList<TokenMatch>()); } - analysisResult.get(name).add(tknMatch); + result.get(name).add(tknMatch); } } - private String[] getFastListNames(final TokenPredicate token){ + private boolean usesListName(final String listname){ + + boolean uses = false; + try{ + LIST_NAMES_LOCK.readLock().lock(); + Site site = context.getSite(); + + while(!uses && null != site){ + + // find listnames used for this token predicate + for(String[] listnames : LIST_NAMES.get(site).values()){ + if(0 <= Arrays.binarySearch(listnames, listname, null)){ + uses = true; + break; + } + } + + // prepare to go to parent + site = site.getParent(); + } + }finally{ + LIST_NAMES_LOCK.readLock().unlock(); + } + return uses; + } + private String[] getListNames(final TokenPredicate token){ - init(); + String[] listNames = null; try{ LIST_NAMES_LOCK.readLock().lock(); Site site = context.getSite(); - while(null != site){ + while(null == listNames && null != site){ // find listnames used for this token predicate - listNames = getFastListNamesImpl(token, site); - - // update each listname to the format the fast query matching servers use - if(null != listNames){ - for(int i = 0; i < listNames.length; ++i){ - listNames[i] = REAL_TOKEN_PREFIX + listNames[i] + REAL_TOKEN_SUFFIX; - } - break; - } - + listNames = LIST_NAMES.get(site).get(token); + // prepare to go to parent site = site.getParent(); } @@ -352,12 +406,6 @@ } return listNames; } - - private static String[] getFastListNamesImpl(final TokenPredicate token, final Site site){ - - final Map<TokenPredicate,String[]> listNames = LIST_NAMES.get(site); - return listNames.get(token); - } // Inner classes ------------------------------------------------- } _______________________________________________ Kernel-commits mailing list [email protected] http://sesat.no/mailman/listinfo/kernel-commits
